rust_htslib/bcf/
record.rs

1// Copyright 2014 Johannes Köster.
2// Licensed under the MIT license (http://opensource.org/licenses/MIT)
3// This file may not be copied, modified, or distributed
4// except according to those terms.
5
6use std::borrow::{Borrow, BorrowMut};
7use std::f32;
8use std::ffi;
9use std::fmt;
10use std::i32;
11use std::marker::PhantomData;
12use std::ops::Deref;
13use std::os::raw::c_char;
14use std::ptr;
15use std::rc::Rc;
16use std::slice;
17use std::str;
18
19use bio_types::genome;
20use derive_new::new;
21use ieee754::Ieee754;
22use lazy_static::lazy_static;
23
24use crate::bcf::header::{HeaderView, Id};
25use crate::bcf::Error;
26use crate::errors::Result;
27use crate::htslib;
28
29const MISSING_INTEGER: i32 = i32::MIN;
30const VECTOR_END_INTEGER: i32 = i32::MIN + 1;
31
32lazy_static! {
33    static ref MISSING_FLOAT: f32 = Ieee754::from_bits(0x7F80_0001);
34    static ref VECTOR_END_FLOAT: f32 = Ieee754::from_bits(0x7F80_0002);
35}
36
37/// Common methods for numeric INFO and FORMAT entries
38pub trait Numeric {
39    /// Return true if entry is a missing value
40    fn is_missing(&self) -> bool;
41
42    /// Return missing value for storage in BCF record.
43    fn missing() -> Self;
44}
45
46impl Numeric for f32 {
47    fn is_missing(&self) -> bool {
48        self.bits() == MISSING_FLOAT.bits()
49    }
50
51    fn missing() -> f32 {
52        *MISSING_FLOAT
53    }
54}
55
56impl Numeric for i32 {
57    fn is_missing(&self) -> bool {
58        *self == MISSING_INTEGER
59    }
60
61    fn missing() -> i32 {
62        MISSING_INTEGER
63    }
64}
65
66trait NumericUtils {
67    /// Return true if entry marks the end of the record.
68    fn is_vector_end(&self) -> bool;
69}
70
71impl NumericUtils for f32 {
72    fn is_vector_end(&self) -> bool {
73        self.bits() == VECTOR_END_FLOAT.bits()
74    }
75}
76
77impl NumericUtils for i32 {
78    fn is_vector_end(&self) -> bool {
79        *self == VECTOR_END_INTEGER
80    }
81}
82
83/// A trait to allow for seamless use of bytes or integer identifiers for filters
84pub trait FilterId {
85    fn id_from_header(&self, header: &HeaderView) -> Result<Id>;
86    fn is_pass(&self) -> bool;
87}
88
89impl FilterId for [u8] {
90    fn id_from_header(&self, header: &HeaderView) -> Result<Id> {
91        header.name_to_id(self)
92    }
93    fn is_pass(&self) -> bool {
94        matches!(self, b"PASS" | b".")
95    }
96}
97
98impl FilterId for Id {
99    fn id_from_header(&self, _header: &HeaderView) -> Result<Id> {
100        Ok(*self)
101    }
102    fn is_pass(&self) -> bool {
103        *self == Id(0)
104    }
105}
106
107/// A buffer for info or format data.
108#[derive(Debug)]
109pub struct Buffer {
110    inner: *mut ::std::os::raw::c_void,
111    len: i32,
112}
113
114impl Buffer {
115    pub fn new() -> Self {
116        Buffer {
117            inner: ptr::null_mut(),
118            len: 0,
119        }
120    }
121}
122
123impl Drop for Buffer {
124    fn drop(&mut self) {
125        unsafe {
126            ::libc::free(self.inner as *mut ::libc::c_void);
127        }
128    }
129}
130
131#[derive(new, Debug)]
132pub struct BufferBacked<'a, T: 'a + fmt::Debug, B: Borrow<Buffer> + 'a> {
133    value: T,
134    buffer: B,
135    #[new(default)]
136    phantom: PhantomData<&'a B>,
137}
138
139impl<'a, T: 'a + fmt::Debug, B: Borrow<Buffer> + 'a> Deref for BufferBacked<'a, T, B> {
140    type Target = T;
141
142    fn deref(&self) -> &T {
143        &self.value
144    }
145}
146
147impl<'a, T: 'a + fmt::Debug + fmt::Display, B: Borrow<Buffer> + 'a> fmt::Display
148    for BufferBacked<'a, T, B>
149{
150    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
151        fmt::Display::fmt(&self.value, f)
152    }
153}
154
155/// A VCF/BCF record.
156/// New records can be created by the `empty_record` methods of [`bcf::Reader`](crate::bcf::Reader)
157/// and [`bcf::Writer`](crate::bcf::Writer).
158/// # Example
159/// ```rust
160/// use rust_htslib::bcf::{Format, Writer};
161/// use rust_htslib::bcf::header::Header;
162///
163/// // Create minimal VCF header with a single sample
164/// let mut header = Header::new();
165/// header.push_sample("sample".as_bytes());
166///
167/// // Write uncompressed VCF to stdout with above header and get an empty record
168/// let mut vcf = Writer::from_stdout(&header, true, Format::Vcf).unwrap();
169/// let mut record = vcf.empty_record();
170/// ```
171#[derive(Debug)]
172pub struct Record {
173    pub inner: *mut htslib::bcf1_t,
174    header: Rc<HeaderView>,
175}
176
177impl Record {
178    /// Construct record with reference to header `HeaderView`, for create-internal use.
179    pub(crate) fn new(header: Rc<HeaderView>) -> Self {
180        let inner = unsafe {
181            let inner = htslib::bcf_init();
182            // Always unpack record.
183            htslib::bcf_unpack(inner, htslib::BCF_UN_ALL as i32);
184            inner
185        };
186        Record { inner, header }
187    }
188
189    /// Force unpacking of internal record values.
190    pub fn unpack(&mut self) {
191        unsafe { htslib::bcf_unpack(self.inner, htslib::BCF_UN_ALL as i32) };
192    }
193
194    /// Return associated header.
195    pub fn header(&self) -> &HeaderView {
196        self.header.as_ref()
197    }
198
199    /// Set the record header.
200    pub(crate) fn set_header(&mut self, header: Rc<HeaderView>) {
201        self.header = header;
202    }
203
204    /// Return reference to the inner C struct.
205    ///
206    /// # Remarks
207    ///
208    /// Note that this function is only required as long as Rust-Htslib does not provide full
209    /// access to all aspects of Htslib.
210    pub fn inner(&self) -> &htslib::bcf1_t {
211        unsafe { &*self.inner }
212    }
213
214    /// Return mutable reference to inner C struct.
215    ///
216    /// # Remarks
217    ///
218    /// Note that this function is only required as long as Rust-Htslib does not provide full
219    /// access to all aspects of Htslib.
220    pub fn inner_mut(&mut self) -> &mut htslib::bcf1_t {
221        unsafe { &mut *self.inner }
222    }
223
224    /// Get the reference id of the record.
225    ///
226    /// To look up the contig name,
227    /// use [`HeaderView::rid2name`](../header/struct.HeaderView.html#method.rid2name).
228    ///
229    /// # Returns
230    ///
231    /// - `Some(rid)` if the internal `rid` is set to a value that is not `-1`
232    /// - `None` if the internal `rid` is set to `-1`
233    pub fn rid(&self) -> Option<u32> {
234        match self.inner().rid {
235            -1 => None,
236            rid => Some(rid as u32),
237        }
238    }
239
240    /// Update the reference id of the record.
241    ///
242    /// To look up reference id for a contig name,
243    /// use [`HeaderView::name2rid`](../header/struct.HeaderView.html#method.name2rid).
244    ///
245    /// # Example
246    ///
247    /// Example assumes we have a Record `record` from a VCF with a header containing region
248    /// named `1`. See [module documentation](../index.html#example-writing) for how to set
249    /// up VCF, header, and record.
250    ///
251    /// ```
252    /// # use rust_htslib::bcf::{Format, Writer};
253    /// # use rust_htslib::bcf::header::Header;
254    /// # let mut header = Header::new();
255    /// # let header_contig_line = r#"##contig=<ID=1,length=10>"#;
256    /// # header.push_record(header_contig_line.as_bytes());
257    /// # header.push_sample("test_sample".as_bytes());
258    /// # let mut vcf = Writer::from_stdout(&header, true, Format::Vcf).unwrap();
259    /// # let mut record = vcf.empty_record();
260    /// let rid = record.header().name2rid(b"1").ok();
261    /// record.set_rid(rid);
262    /// assert_eq!(record.rid(), rid);
263    /// let name = record.header().rid2name(record.rid().unwrap()).ok();
264    /// assert_eq!(Some("1".as_bytes()), name);
265    /// ```
266    pub fn set_rid(&mut self, rid: Option<u32>) {
267        match rid {
268            Some(rid) => self.inner_mut().rid = rid as i32,
269            None => self.inner_mut().rid = -1,
270        }
271    }
272
273    /// Return **0-based** position
274    pub fn pos(&self) -> i64 {
275        self.inner().pos
276    }
277
278    /// Set **0-based** position
279    pub fn set_pos(&mut self, pos: i64) {
280        self.inner_mut().pos = pos;
281    }
282
283    /// Return the **0-based, exclusive** end position
284    ///
285    /// # Example
286    /// ```rust
287    /// # use rust_htslib::bcf::{Format, Header, Writer};
288    /// # use tempfile::NamedTempFile;
289    /// # let tmp = NamedTempFile::new().unwrap();
290    /// # let path = tmp.path();
291    /// # let header = Header::new();
292    /// # let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
293    /// # let mut record = vcf.empty_record();
294    /// let alleles: &[&[u8]] = &[b"AGG", b"TG"];
295    /// record.set_alleles(alleles).expect("Failed to set alleles");
296    /// record.set_pos(5);
297    ///
298    /// assert_eq!(record.end(), 8)
299    /// ```
300    pub fn end(&self) -> i64 {
301        self.pos() + self.rlen()
302    }
303
304    /// Return the value of the ID column.
305    ///
306    /// When empty, returns `b".".to_vec()`.
307    pub fn id(&self) -> Vec<u8> {
308        if self.inner().d.id.is_null() {
309            b".".to_vec()
310        } else {
311            let id = unsafe { ffi::CStr::from_ptr(self.inner().d.id) };
312            id.to_bytes().to_vec()
313        }
314    }
315
316    /// Update the ID string to the given value.
317    pub fn set_id(&mut self, id: &[u8]) -> Result<()> {
318        let c_str = ffi::CString::new(id).unwrap();
319        if unsafe {
320            htslib::bcf_update_id(
321                self.header().inner,
322                self.inner,
323                c_str.as_ptr() as *mut c_char,
324            )
325        } == 0
326        {
327            Ok(())
328        } else {
329            Err(Error::BcfSetValues)
330        }
331    }
332
333    /// Clear the ID column (set it to `"."`).
334    pub fn clear_id(&mut self) -> Result<()> {
335        let c_str = ffi::CString::new(&b"."[..]).unwrap();
336        if unsafe {
337            htslib::bcf_update_id(
338                self.header().inner,
339                self.inner,
340                c_str.as_ptr() as *mut c_char,
341            )
342        } == 0
343        {
344            Ok(())
345        } else {
346            Err(Error::BcfSetValues)
347        }
348    }
349
350    /// Add the ID string (the ID field is semicolon-separated), checking for duplicates.
351    pub fn push_id(&mut self, id: &[u8]) -> Result<()> {
352        let c_str = ffi::CString::new(id).unwrap();
353        if unsafe {
354            htslib::bcf_add_id(
355                self.header().inner,
356                self.inner,
357                c_str.as_ptr() as *mut c_char,
358            )
359        } == 0
360        {
361            Ok(())
362        } else {
363            Err(Error::BcfSetValues)
364        }
365    }
366
367    /// Return `Filters` iterator for enumerating all filters that have been set.
368    ///
369    /// A record having the `PASS` filter will return an empty `Filter` here.
370    pub fn filters(&self) -> Filters<'_> {
371        Filters::new(self)
372    }
373
374    /// Query whether the filter with the given ID has been set.
375    ///
376    /// This method can be used to check if a record passes filtering by using either `Id(0)`,
377    /// `PASS` or `.`
378    ///
379    /// # Example
380    /// ```rust
381    /// # use rust_htslib::bcf::{Format, Header, Writer};
382    /// # use rust_htslib::bcf::header::Id;
383    /// # use tempfile::NamedTempFile;
384    /// # let tmp = tempfile::NamedTempFile::new().unwrap();
385    /// # let path = tmp.path();
386    /// let mut header = Header::new();
387    /// header.push_record(br#"##FILTER=<ID=foo,Description="sample is a foo fighter">"#);
388    /// # let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
389    /// # let mut record = vcf.empty_record();
390    /// assert!(record.has_filter("PASS".as_bytes()));
391    /// assert!(record.has_filter(".".as_bytes()));
392    /// assert!(record.has_filter(&Id(0)));
393    ///
394    /// record.push_filter("foo".as_bytes()).unwrap();
395    /// assert!(record.has_filter("foo".as_bytes()));
396    /// assert!(!record.has_filter("PASS".as_bytes()))
397    /// ```
398    pub fn has_filter<T: FilterId + ?Sized>(&self, flt_id: &T) -> bool {
399        if flt_id.is_pass() && self.inner().d.n_flt == 0 {
400            return true;
401        }
402        let id = match flt_id.id_from_header(self.header()) {
403            Ok(i) => *i,
404            Err(_) => return false,
405        };
406        for i in 0..(self.inner().d.n_flt as isize) {
407            if unsafe { *self.inner().d.flt.offset(i) } == id as i32 {
408                return true;
409            }
410        }
411        false
412    }
413
414    /// Set the given filter IDs to the FILTER column.
415    ///
416    /// Setting an empty slice removes all filters and sets `PASS`.
417    ///
418    /// # Example
419    /// ```rust
420    /// # use rust_htslib::bcf::{Format, Header, Writer};
421    /// # use rust_htslib::bcf::header::Id;
422    /// # use tempfile::NamedTempFile;
423    /// # let tmp = tempfile::NamedTempFile::new().unwrap();
424    /// # let path = tmp.path();
425    /// let mut header = Header::new();
426    /// header.push_record(br#"##FILTER=<ID=foo,Description="sample is a foo fighter">"#);
427    /// header.push_record(br#"##FILTER=<ID=bar,Description="a horse walks into...">"#);
428    /// # let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
429    /// # let mut record = vcf.empty_record();
430    /// let foo = record.header().name_to_id(b"foo").unwrap();
431    /// let bar = record.header().name_to_id(b"bar").unwrap();
432    /// assert!(record.has_filter("PASS".as_bytes()));
433    /// let mut filters = vec![&foo, &bar];
434    /// record.set_filters(&filters).unwrap();
435    /// assert!(record.has_filter(&foo));
436    /// assert!(record.has_filter(&bar));
437    /// assert!(!record.has_filter("PASS".as_bytes()));
438    /// filters.clear();
439    /// record.set_filters(&filters).unwrap();
440    /// assert!(record.has_filter("PASS".as_bytes()));
441    /// assert!(!record.has_filter("foo".as_bytes()));
442    /// // 'baz' isn't in the header
443    /// assert!(record.set_filters(&["baz".as_bytes()]).is_err())
444    /// ```
445    ///
446    /// # Errors
447    /// If any of the filter IDs do not exist in the header, an [`Error::BcfUnknownID`] is returned.
448    ///
449    pub fn set_filters<T: FilterId + ?Sized>(&mut self, flt_ids: &[&T]) -> Result<()> {
450        let mut ids: Vec<i32> = flt_ids
451            .iter()
452            .map(|id| id.id_from_header(self.header()).map(|id| *id as i32))
453            .collect::<Result<Vec<i32>>>()?;
454        unsafe {
455            htslib::bcf_update_filter(
456                self.header().inner,
457                self.inner,
458                ids.as_mut_ptr(),
459                ids.len() as i32,
460            );
461        };
462        Ok(())
463    }
464
465    /// Add the given filter to the FILTER column.
466    ///
467    /// If `flt_id` is `PASS` or `.` then all existing filters are removed first. Otherwise,
468    /// any existing `PASS` filter is removed.
469    ///
470    /// # Example
471    /// ```rust
472    /// # use rust_htslib::bcf::{Format, Header, Writer};
473    /// # use tempfile::NamedTempFile;
474    /// # let tmp = tempfile::NamedTempFile::new().unwrap();
475    /// # let path = tmp.path();
476    /// let mut header = Header::new();
477    /// header.push_record(br#"##FILTER=<ID=foo,Description="sample is a foo fighter">"#);
478    /// header.push_record(br#"##FILTER=<ID=bar,Description="dranks">"#);
479    /// # let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
480    /// # let mut record = vcf.empty_record();
481    /// let foo = "foo".as_bytes();
482    /// let bar = record.header().name_to_id(b"bar").unwrap();
483    /// assert!(record.has_filter("PASS".as_bytes()));
484    ///
485    /// record.push_filter(foo).unwrap();
486    /// record.push_filter(&bar).unwrap();
487    /// assert!(record.has_filter(foo));
488    /// assert!(record.has_filter(&bar));
489    /// // filter must exist in the header
490    /// assert!(record.push_filter("baz".as_bytes()).is_err())
491    /// ```
492    ///
493    /// # Errors
494    /// If the `flt_id` does not exist in the header, an [`Error::BcfUnknownID`] is returned.
495    ///
496    pub fn push_filter<T: FilterId + ?Sized>(&mut self, flt_id: &T) -> Result<()> {
497        let id = flt_id.id_from_header(self.header())?;
498        unsafe {
499            htslib::bcf_add_filter(self.header().inner, self.inner, *id as i32);
500        };
501        Ok(())
502    }
503
504    /// Remove the given filter from the FILTER column.
505    ///
506    /// # Arguments
507    ///
508    /// - `flt_id` - The corresponding filter ID to remove.
509    /// - `pass_on_empty` - Set to `PASS` when removing the last filter.
510    ///
511    /// # Example
512    /// ```rust
513    /// # use rust_htslib::bcf::{Format, Header, Writer};
514    /// # use tempfile::NamedTempFile;
515    /// # let tmp = tempfile::NamedTempFile::new().unwrap();
516    /// # let path = tmp.path();
517    /// let mut header = Header::new();
518    /// header.push_record(br#"##FILTER=<ID=foo,Description="sample is a foo fighter">"#);
519    /// header.push_record(br#"##FILTER=<ID=bar,Description="a horse walks into...">"#);
520    /// # let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
521    /// # let mut record = vcf.empty_record();
522    /// let foo = "foo".as_bytes();
523    /// let bar = "bar".as_bytes();
524    /// record.set_filters(&[foo, bar]).unwrap();
525    /// assert!(record.has_filter(foo));
526    /// assert!(record.has_filter(bar));
527    ///
528    /// record.remove_filter(foo, true).unwrap();
529    /// assert!(!record.has_filter(foo));
530    /// assert!(record.has_filter(bar));
531    /// // 'baz' is not in the header
532    /// assert!(record.remove_filter("baz".as_bytes(), true).is_err());
533    ///
534    /// record.remove_filter(bar, true).unwrap();
535    /// assert!(!record.has_filter(bar));
536    /// assert!(record.has_filter("PASS".as_bytes()));
537    /// ```
538    ///
539    /// # Errors
540    /// If the `flt_id` does not exist in the header, an [`Error::BcfUnknownID`] is returned.
541    ///
542    pub fn remove_filter<T: FilterId + ?Sized>(
543        &mut self,
544        flt_id: &T,
545        pass_on_empty: bool,
546    ) -> Result<()> {
547        let id = flt_id.id_from_header(self.header())?;
548        unsafe {
549            htslib::bcf_remove_filter(
550                self.header().inner,
551                self.inner,
552                *id as i32,
553                pass_on_empty as i32,
554            )
555        };
556        Ok(())
557    }
558
559    /// Get alleles strings.
560    ///
561    /// The first allele is the reference allele.
562    pub fn alleles(&self) -> Vec<&[u8]> {
563        unsafe { htslib::bcf_unpack(self.inner, htslib::BCF_UN_ALL as i32) };
564        let n = self.inner().n_allele() as usize;
565        let dec = self.inner().d;
566        let alleles = unsafe { slice::from_raw_parts(dec.allele, n) };
567        (0..n)
568            .map(|i| unsafe { ffi::CStr::from_ptr(alleles[i]).to_bytes() })
569            .collect()
570    }
571
572    /// Set alleles. The first allele is the reference allele.
573    ///
574    /// # Example
575    /// ```rust
576    /// # use rust_htslib::bcf::{Format, Writer};
577    /// # use rust_htslib::bcf::header::Header;
578    /// #
579    /// # // Create minimal VCF header with a single sample
580    /// # let mut header = Header::new();
581    /// # header.push_sample("sample".as_bytes());
582    /// #
583    /// # // Write uncompressed VCF to stdout with above header and get an empty record
584    /// # let mut vcf = Writer::from_stdout(&header, true, Format::Vcf).unwrap();
585    /// # let mut record = vcf.empty_record();
586    /// assert_eq!(record.allele_count(), 0);
587    ///
588    /// let alleles: &[&[u8]] = &[b"A", b"TG"];
589    /// record.set_alleles(alleles).expect("Failed to set alleles");
590    /// assert_eq!(record.allele_count(), 2)
591    /// ```
592    pub fn set_alleles(&mut self, alleles: &[&[u8]]) -> Result<()> {
593        let cstrings: Vec<ffi::CString> = alleles
594            .iter()
595            .map(|vec| ffi::CString::new(*vec).unwrap())
596            .collect();
597        let mut ptrs: Vec<*const c_char> = cstrings
598            .iter()
599            .map(|cstr| cstr.as_ptr() as *const c_char)
600            .collect();
601        if unsafe {
602            htslib::bcf_update_alleles(
603                self.header().inner,
604                self.inner,
605                ptrs.as_mut_ptr(),
606                alleles.len() as i32,
607            )
608        } == 0
609        {
610            Ok(())
611        } else {
612            Err(Error::BcfSetValues)
613        }
614    }
615
616    /// Get variant quality.
617    pub fn qual(&self) -> f32 {
618        self.inner().qual
619    }
620
621    /// Set variant quality.
622    pub fn set_qual(&mut self, qual: f32) {
623        self.inner_mut().qual = qual;
624    }
625
626    pub fn info<'a>(&'a self, tag: &'a [u8]) -> Info<'a, Buffer> {
627        self.info_shared_buffer(tag, Buffer::new())
628    }
629
630    /// Get the value of the given info tag.
631    pub fn info_shared_buffer<'a, 'b, B: BorrowMut<Buffer> + Borrow<Buffer> + 'b>(
632        &'a self,
633        tag: &'a [u8],
634        buffer: B,
635    ) -> Info<'a, B> {
636        Info {
637            record: self,
638            tag,
639            buffer,
640        }
641    }
642
643    /// Get the number of samples in the record.
644    pub fn sample_count(&self) -> u32 {
645        self.inner().n_sample()
646    }
647
648    /// Get the number of alleles, including reference allele.
649    pub fn allele_count(&self) -> u32 {
650        self.inner().n_allele()
651    }
652
653    /// Add/replace genotypes in FORMAT GT tag.
654    ///
655    /// # Arguments
656    ///
657    /// - `genotypes` - a flattened, two-dimensional array of GenotypeAllele,
658    ///                 the first dimension contains one array for each sample.
659    ///
660    /// # Errors
661    ///
662    /// Returns error if GT tag is not present in header.
663    ///
664    /// # Example
665    ///
666    /// Example assumes we have a Record `record` from a VCF with a `GT` `FORMAT` tag.
667    /// See [module documentation](../index.html#example-writing) for how to set up
668    /// VCF, header, and record.
669    ///
670    /// ```
671    /// # use rust_htslib::bcf::{Format, Writer};
672    /// # use rust_htslib::bcf::header::Header;
673    /// # use rust_htslib::bcf::record::GenotypeAllele;
674    /// # let mut header = Header::new();
675    /// # let header_contig_line = r#"##contig=<ID=1,length=10>"#;
676    /// # header.push_record(header_contig_line.as_bytes());
677    /// # let header_gt_line = r#"##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">"#;
678    /// # header.push_record(header_gt_line.as_bytes());
679    /// # header.push_sample("test_sample".as_bytes());
680    /// # let mut vcf = Writer::from_stdout(&header, true, Format::Vcf).unwrap();
681    /// # let mut record = vcf.empty_record();
682    /// let alleles = &[GenotypeAllele::Unphased(1), GenotypeAllele::Unphased(1)];
683    /// record.push_genotypes(alleles);
684    /// assert_eq!("1/1", &format!("{}", record.genotypes().unwrap().get(0)));
685    /// ```
686    pub fn push_genotypes(&mut self, genotypes: &[GenotypeAllele]) -> Result<()> {
687        let encoded: Vec<i32> = genotypes.iter().map(|gt| i32::from(*gt)).collect();
688        self.push_format_integer(b"GT", &encoded)
689    }
690
691    /// Get genotypes as vector of one `Genotype` per sample.
692    ///
693    /// # Example
694    /// Parsing genotype field (`GT` tag) from a VCF record:
695    /// ```
696    /// use crate::rust_htslib::bcf::{Reader, Read};
697    /// let mut vcf = Reader::from_path(&"test/test_string.vcf").expect("Error opening file.");
698    /// let expected = ["./1", "1|1", "0/1", "0|1", "1|.", "1/1"];
699    /// for (rec, exp_gt) in vcf.records().zip(expected.iter()) {
700    ///     let mut rec = rec.expect("Error reading record.");
701    ///     let genotypes = rec.genotypes().expect("Error reading genotypes");
702    ///     assert_eq!(&format!("{}", genotypes.get(0)), exp_gt);
703    /// }
704    /// ```
705    pub fn genotypes(&self) -> Result<Genotypes<'_, Buffer>> {
706        self.genotypes_shared_buffer(Buffer::new())
707    }
708
709    /// Get genotypes as vector of one `Genotype` per sample, using a given shared buffer
710    /// to avoid unnecessary allocations.
711    pub fn genotypes_shared_buffer<'a, B>(&self, buffer: B) -> Result<Genotypes<'a, B>>
712    where
713        B: BorrowMut<Buffer> + Borrow<Buffer> + 'a,
714    {
715        Ok(Genotypes {
716            encoded: self.format_shared_buffer(b"GT", buffer).integer()?,
717        })
718    }
719
720    /// Retrieve data for a `FORMAT` field
721    ///
722    /// # Example
723    /// *Note: some boilerplate for the example is hidden for clarity. See [module documentation](../index.html#example-writing)
724    /// for an example of the setup used here.*
725    ///
726    /// ```rust
727    /// # use rust_htslib::bcf::{Format, Writer};
728    /// # use rust_htslib::bcf::header::Header;
729    /// #
730    /// # // Create minimal VCF header with a single sample
731    /// # let mut header = Header::new();
732    /// header.push_sample(b"sample1").push_sample(b"sample2").push_record(br#"##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">"#);
733    /// #
734    /// # // Write uncompressed VCF to stdout with above header and get an empty record
735    /// # let mut vcf = Writer::from_stdout(&header, true, Format::Vcf).unwrap();
736    /// # let mut record = vcf.empty_record();
737    /// record.push_format_integer(b"DP", &[20, 12]).expect("Failed to set DP format field");
738    ///
739    /// let read_depths = record.format(b"DP").integer().expect("Couldn't retrieve DP field");
740    /// let sample1_depth = read_depths[0];
741    /// assert_eq!(sample1_depth, &[20]);
742    /// let sample2_depth = read_depths[1];
743    /// assert_eq!(sample2_depth, &[12])
744    /// ```
745    ///
746    /// # Errors
747    /// **Attention:** the returned [`BufferBacked`] from [`integer()`](Format::integer)
748    /// (`read_depths`), which holds the data, has to be kept in scope as long as the data is
749    /// accessed. If parts of the data are accessed after the `BufferBacked` object is been
750    /// dropped, you will access unallocated memory.
751    pub fn format<'a>(&'a self, tag: &'a [u8]) -> Format<'a, Buffer> {
752        self.format_shared_buffer(tag, Buffer::new())
753    }
754
755    /// Get the value of the given format tag for each sample.
756    pub fn format_shared_buffer<'a, 'b, B: BorrowMut<Buffer> + Borrow<Buffer> + 'b>(
757        &'a self,
758        tag: &'a [u8],
759        buffer: B,
760    ) -> Format<'a, B> {
761        Format::new(self, tag, buffer)
762    }
763
764    /// Add/replace an integer-typed FORMAT tag.
765    ///
766    /// # Arguments
767    ///
768    /// - `tag` - The tag's string.
769    /// - `data` - a flattened, two-dimensional array, the first dimension contains one array
770    ///            for each sample.
771    ///
772    /// # Errors
773    ///
774    /// Returns error if tag is not present in header.
775    pub fn push_format_integer(&mut self, tag: &[u8], data: &[i32]) -> Result<()> {
776        self.push_format(tag, data, htslib::BCF_HT_INT)
777    }
778
779    /// Add/replace a float-typed FORMAT tag.
780    ///
781    /// # Arguments
782    ///
783    /// - `tag` - The tag's string.
784    /// - `data` - a flattened, two-dimensional array, the first dimension contains one array
785    ///            for each sample.
786    ///
787    /// # Errors
788    ///
789    /// Returns error if tag is not present in header.
790    ///
791    /// # Example
792    ///
793    /// Example assumes we have a Record `record` from a VCF with an `AF` `FORMAT` tag.
794    /// See [module documentation](../index.html#example-writing) for how to set up
795    /// VCF, header, and record.
796    ///
797    /// ```
798    /// # use rust_htslib::bcf::{Format, Writer};
799    /// # use rust_htslib::bcf::header::Header;
800    /// # use rust_htslib::bcf::record::GenotypeAllele;
801    /// # let mut header = Header::new();
802    /// # let header_contig_line = r#"##contig=<ID=1,length=10>"#;
803    /// # header.push_record(header_contig_line.as_bytes());
804    /// # let header_af_line = r#"##FORMAT=<ID=AF,Number=1,Type=Float,Description="Frequency">"#;
805    /// # header.push_record(header_af_line.as_bytes());
806    /// # header.push_sample("test_sample".as_bytes());
807    /// # let mut vcf = Writer::from_stdout(&header, true, Format::Vcf).unwrap();
808    /// # let mut record = vcf.empty_record();
809    /// record.push_format_float(b"AF", &[0.5]);
810    /// assert_eq!(0.5, record.format(b"AF").float().unwrap()[0][0]);
811    /// ```
812    pub fn push_format_float(&mut self, tag: &[u8], data: &[f32]) -> Result<()> {
813        self.push_format(tag, data, htslib::BCF_HT_REAL)
814    }
815
816    /// Add/replace a single-char-typed FORMAT tag.
817    ///
818    /// # Arguments
819    ///
820    /// - `tag` - The tag's string.
821    /// - `data` - a flattened, two-dimensional array, the first dimension contains one array
822    ///            for each sample.
823    ///
824    /// # Errors
825    ///
826    /// Returns error if tag is not present in header.
827    pub fn push_format_char(&mut self, tag: &[u8], data: &[u8]) -> Result<()> {
828        self.push_format(tag, data, htslib::BCF_HT_STR)
829    }
830
831    /// Add a format tag. Data is a flattened two-dimensional array.
832    /// The first dimension contains one array for each sample.
833    fn push_format<T>(&mut self, tag: &[u8], data: &[T], ht: u32) -> Result<()> {
834        let tag_c_str = ffi::CString::new(tag).unwrap();
835        unsafe {
836            if htslib::bcf_update_format(
837                self.header().inner,
838                self.inner,
839                tag_c_str.as_ptr() as *mut c_char,
840                data.as_ptr() as *const ::std::os::raw::c_void,
841                data.len() as i32,
842                ht as i32,
843            ) == 0
844            {
845                Ok(())
846            } else {
847                Err(Error::BcfSetTag {
848                    tag: str::from_utf8(tag).unwrap().to_owned(),
849                })
850            }
851        }
852    }
853
854    // TODO: should we add convenience methods clear_format_*?
855
856    /// Add a string-typed FORMAT tag. Note that genotypes are treated as a special case
857    /// and cannot be added with this method. See instead [push_genotypes](#method.push_genotypes).
858    ///
859    /// # Arguments
860    ///
861    /// - `tag` - The tag's string.
862    /// - `data` - a two-dimensional array, the first dimension contains one array
863    ///            for each sample. Must be non-empty.
864    ///
865    /// # Errors
866    ///
867    /// Returns error if tag is not present in header.
868    pub fn push_format_string<D: Borrow<[u8]>>(&mut self, tag: &[u8], data: &[D]) -> Result<()> {
869        assert!(
870            !data.is_empty(),
871            "given string data must have at least 1 element"
872        );
873        let c_data = data
874            .iter()
875            .map(|s| ffi::CString::new(s.borrow()).unwrap())
876            .collect::<Vec<ffi::CString>>();
877        let c_ptrs = c_data
878            .iter()
879            .map(|s| s.as_ptr() as *mut i8)
880            .collect::<Vec<*mut i8>>();
881        let tag_c_str = ffi::CString::new(tag).unwrap();
882        unsafe {
883            if htslib::bcf_update_format_string(
884                self.header().inner,
885                self.inner,
886                tag_c_str.as_ptr() as *mut c_char,
887                c_ptrs.as_slice().as_ptr() as *mut *const c_char,
888                data.len() as i32,
889            ) == 0
890            {
891                Ok(())
892            } else {
893                Err(Error::BcfSetTag {
894                    tag: str::from_utf8(tag).unwrap().to_owned(),
895                })
896            }
897        }
898    }
899
900    /// Add/replace an integer-typed INFO entry.
901    pub fn push_info_integer(&mut self, tag: &[u8], data: &[i32]) -> Result<()> {
902        self.push_info(tag, data, htslib::BCF_HT_INT)
903    }
904
905    /// Remove the integer-typed INFO entry.
906    pub fn clear_info_integer(&mut self, tag: &[u8]) -> Result<()> {
907        self.push_info::<i32>(tag, &[], htslib::BCF_HT_INT)
908    }
909
910    /// Add/replace a float-typed INFO entry.
911    pub fn push_info_float(&mut self, tag: &[u8], data: &[f32]) -> Result<()> {
912        self.push_info(tag, data, htslib::BCF_HT_REAL)
913    }
914
915    /// Remove the float-typed INFO entry.
916    pub fn clear_info_float(&mut self, tag: &[u8]) -> Result<()> {
917        self.push_info::<u8>(tag, &[], htslib::BCF_HT_REAL)
918    }
919
920    /// Add/replace an INFO tag.
921    ///
922    /// # Arguments
923    /// * `tag` - the tag to add/replace
924    /// * `data` - the data to set
925    /// * `ht` - the HTSLib type to use
926    fn push_info<T>(&mut self, tag: &[u8], data: &[T], ht: u32) -> Result<()> {
927        let tag_c_str = ffi::CString::new(tag).unwrap();
928        unsafe {
929            if htslib::bcf_update_info(
930                self.header().inner,
931                self.inner,
932                tag_c_str.as_ptr() as *mut c_char,
933                data.as_ptr() as *const ::std::os::raw::c_void,
934                data.len() as i32,
935                ht as i32,
936            ) == 0
937            {
938                Ok(())
939            } else {
940                Err(Error::BcfSetTag {
941                    tag: str::from_utf8(tag).unwrap().to_owned(),
942                })
943            }
944        }
945    }
946
947    /// Set flag into the INFO column.
948    pub fn push_info_flag(&mut self, tag: &[u8]) -> Result<()> {
949        self.push_info_string_impl(tag, &[b""], htslib::BCF_HT_FLAG)
950    }
951
952    /// Remove the flag from the INFO column.
953    pub fn clear_info_flag(&mut self, tag: &[u8]) -> Result<()> {
954        self.push_info_string_impl(tag, &[], htslib::BCF_HT_FLAG)
955    }
956
957    /// Add/replace a string-typed INFO entry.
958    pub fn push_info_string(&mut self, tag: &[u8], data: &[&[u8]]) -> Result<()> {
959        self.push_info_string_impl(tag, data, htslib::BCF_HT_STR)
960    }
961
962    /// Remove the string field from the INFO column.
963    pub fn clear_info_string(&mut self, tag: &[u8]) -> Result<()> {
964        self.push_info_string_impl(tag, &[], htslib::BCF_HT_STR)
965    }
966
967    /// Add an string-valued INFO tag.
968    fn push_info_string_impl(&mut self, tag: &[u8], data: &[&[u8]], ht: u32) -> Result<()> {
969        let mut buf: Vec<u8> = Vec::new();
970        for (i, &s) in data.iter().enumerate() {
971            if i > 0 {
972                buf.extend(b",");
973            }
974            buf.extend(s);
975        }
976        let c_str = ffi::CString::new(buf).unwrap();
977        let len = if ht == htslib::BCF_HT_FLAG {
978            data.len()
979        } else {
980            c_str.to_bytes().len()
981        };
982        let tag_c_str = ffi::CString::new(tag).unwrap();
983        unsafe {
984            if htslib::bcf_update_info(
985                self.header().inner,
986                self.inner,
987                tag_c_str.as_ptr() as *mut c_char,
988                c_str.as_ptr() as *const ::std::os::raw::c_void,
989                len as i32,
990                ht as i32,
991            ) == 0
992            {
993                Ok(())
994            } else {
995                Err(Error::BcfSetTag {
996                    tag: str::from_utf8(tag).unwrap().to_owned(),
997                })
998            }
999        }
1000    }
1001
1002    /// Remove unused alleles.
1003    pub fn trim_alleles(&mut self) -> Result<()> {
1004        match unsafe { htslib::bcf_trim_alleles(self.header().inner, self.inner) } {
1005            -1 => Err(Error::BcfRemoveAlleles),
1006            _ => Ok(()),
1007        }
1008    }
1009
1010    pub fn remove_alleles(&mut self, remove: &[bool]) -> Result<()> {
1011        let rm_set = unsafe { htslib::kbs_init(remove.len()) };
1012
1013        for (i, &r) in remove.iter().enumerate() {
1014            if r {
1015                unsafe {
1016                    htslib::kbs_insert(rm_set, i as i32);
1017                }
1018            }
1019        }
1020
1021        let ret = unsafe { htslib::bcf_remove_allele_set(self.header().inner, self.inner, rm_set) };
1022
1023        unsafe {
1024            htslib::kbs_destroy(rm_set);
1025        }
1026
1027        match ret {
1028            -1 => Err(Error::BcfRemoveAlleles),
1029            _ => Ok(()),
1030        }
1031    }
1032
1033    /// Get the length of the reference allele. If the record has no reference allele, then the
1034    /// result will be `0`.
1035    ///
1036    /// # Example
1037    /// ```rust
1038    /// # use rust_htslib::bcf::{Format, Writer};
1039    /// # use rust_htslib::bcf::header::Header;
1040    /// #
1041    /// # // Create minimal VCF header with a single sample
1042    /// # let mut header = Header::new();
1043    /// # header.push_sample("sample".as_bytes());
1044    /// #
1045    /// # // Write uncompressed VCF to stdout with above header and get an empty record
1046    /// # let mut vcf = Writer::from_stdout(&header, true, Format::Vcf).unwrap();
1047    /// # let mut record = vcf.empty_record();
1048    /// # assert_eq!(record.rlen(), 0);
1049    /// let alleles: &[&[u8]] = &[b"AGG", b"TG"];
1050    /// record.set_alleles(alleles).expect("Failed to set alleles");
1051    /// assert_eq!(record.rlen(), 3)
1052    /// ```
1053    pub fn rlen(&self) -> i64 {
1054        self.inner().rlen
1055    }
1056
1057    /// Clear all parts of the record. Useful if you plan to reuse a record object multiple times.
1058    ///
1059    /// # Example
1060    /// ```rust
1061    /// # use rust_htslib::bcf::{Format, Writer};
1062    /// # use rust_htslib::bcf::header::Header;
1063    /// #
1064    /// # // Create minimal VCF header with a single sample
1065    /// # let mut header = Header::new();
1066    /// # header.push_sample("sample".as_bytes());
1067    /// #
1068    /// # // Write uncompressed VCF to stdout with above header and get an empty record
1069    /// # let mut vcf = Writer::from_stdout(&header, true, Format::Vcf).unwrap();
1070    /// # let mut record = vcf.empty_record();
1071    /// let alleles: &[&[u8]] = &[b"AGG", b"TG"];
1072    /// record.set_alleles(alleles).expect("Failed to set alleles");
1073    /// record.set_pos(6);
1074    /// record.clear();
1075    /// assert_eq!(record.rlen(), 0);
1076    /// assert_eq!(record.pos(), 0)
1077    /// ```
1078    pub fn clear(&self) {
1079        unsafe { htslib::bcf_clear(self.inner) }
1080    }
1081
1082    /// Provide short description of record for locating it in the BCF/VCF file.
1083    pub fn desc(&self) -> String {
1084        if let Some(rid) = self.rid() {
1085            if let Ok(contig) = self.header.rid2name(rid) {
1086                return format!("{}:{}", str::from_utf8(contig).unwrap(), self.pos());
1087            }
1088        }
1089        "".to_owned()
1090    }
1091
1092    /// Convert to VCF String
1093    ///
1094    /// Intended for debug only. Use Writer for efficient VCF output.
1095    ///
1096    pub fn to_vcf_string(&self) -> Result<String> {
1097        let mut buf = htslib::kstring_t {
1098            l: 0,
1099            m: 0,
1100            s: ptr::null_mut(),
1101        };
1102        let ret = unsafe { htslib::vcf_format(self.header().inner, self.inner, &mut buf) };
1103
1104        if ret < 0 {
1105            if !buf.s.is_null() {
1106                unsafe {
1107                    libc::free(buf.s as *mut libc::c_void);
1108                }
1109            }
1110            return Err(Error::BcfToString);
1111        }
1112
1113        let vcf_str = unsafe {
1114            let vcf_str = String::from(ffi::CStr::from_ptr(buf.s).to_str().unwrap());
1115            if !buf.s.is_null() {
1116                libc::free(buf.s as *mut libc::c_void);
1117            }
1118            vcf_str
1119        };
1120
1121        Ok(vcf_str)
1122    }
1123}
1124
1125impl Clone for Record {
1126    fn clone(&self) -> Self {
1127        let inner = unsafe {
1128            let inner = htslib::bcf_dup(self.inner);
1129            inner
1130        };
1131        Record {
1132            inner,
1133            header: self.header.clone(),
1134        }
1135    }
1136}
1137
1138impl genome::AbstractLocus for Record {
1139    fn contig(&self) -> &str {
1140        str::from_utf8(
1141            self.header()
1142                .rid2name(self.rid().expect("rid not set"))
1143                .expect("unable to find rid in header"),
1144        )
1145        .expect("unable to interpret contig name as UTF-8")
1146    }
1147
1148    fn pos(&self) -> u64 {
1149        self.pos() as u64
1150    }
1151}
1152
1153/// Phased or unphased alleles, represented as indices.
1154#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
1155pub enum GenotypeAllele {
1156    Unphased(i32),
1157    Phased(i32),
1158    UnphasedMissing,
1159    PhasedMissing,
1160}
1161
1162impl GenotypeAllele {
1163    /// Decode given integer according to BCF standard.
1164    #[deprecated(
1165        since = "0.36.0",
1166        note = "Please use the conversion trait From<i32> for GenotypeAllele instead."
1167    )]
1168    pub fn from_encoded(encoded: i32) -> Self {
1169        match (encoded, encoded & 1) {
1170            (0, 0) => GenotypeAllele::UnphasedMissing,
1171            (1, 1) => GenotypeAllele::PhasedMissing,
1172            (e, 1) => GenotypeAllele::Phased((e >> 1) - 1),
1173            (e, 0) => GenotypeAllele::Unphased((e >> 1) - 1),
1174            _ => panic!("unexpected phasing type"),
1175        }
1176    }
1177
1178    /// Get the index into the list of alleles.
1179    pub fn index(self) -> Option<u32> {
1180        match self {
1181            GenotypeAllele::Unphased(i) | GenotypeAllele::Phased(i) => Some(i as u32),
1182            GenotypeAllele::UnphasedMissing | GenotypeAllele::PhasedMissing => None,
1183        }
1184    }
1185}
1186
1187impl fmt::Display for GenotypeAllele {
1188    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1189        match self.index() {
1190            Some(a) => write!(f, "{}", a),
1191            None => write!(f, "."),
1192        }
1193    }
1194}
1195
1196impl From<GenotypeAllele> for i32 {
1197    fn from(allele: GenotypeAllele) -> i32 {
1198        let (allele, phased) = match allele {
1199            GenotypeAllele::UnphasedMissing => (-1, 0),
1200            GenotypeAllele::PhasedMissing => (-1, 1),
1201            GenotypeAllele::Unphased(a) => (a, 0),
1202            GenotypeAllele::Phased(a) => (a, 1),
1203        };
1204        (allele + 1) << 1 | phased
1205    }
1206}
1207
1208impl From<i32> for GenotypeAllele {
1209    fn from(encoded: i32) -> GenotypeAllele {
1210        match (encoded, encoded & 1) {
1211            (0, 0) => GenotypeAllele::UnphasedMissing,
1212            (1, 1) => GenotypeAllele::PhasedMissing,
1213            (e, 1) => GenotypeAllele::Phased((e >> 1) - 1),
1214            (e, 0) => GenotypeAllele::Unphased((e >> 1) - 1),
1215            _ => panic!("unexpected phasing type"),
1216        }
1217    }
1218}
1219
1220custom_derive! {
1221    /// Genotype representation as a vector of `GenotypeAllele`.
1222    #[derive(NewtypeDeref, Debug, Clone, PartialEq, Eq, Hash)]
1223    pub struct Genotype(Vec<GenotypeAllele>);
1224}
1225
1226impl fmt::Display for Genotype {
1227    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1228        let &Genotype(ref alleles) = self;
1229        write!(f, "{}", alleles[0])?;
1230        for a in &alleles[1..] {
1231            let sep = match a {
1232                GenotypeAllele::Phased(_) | GenotypeAllele::PhasedMissing => '|',
1233                GenotypeAllele::Unphased(_) | GenotypeAllele::UnphasedMissing => '/',
1234            };
1235            write!(f, "{}{}", sep, a)?;
1236        }
1237        Ok(())
1238    }
1239}
1240
1241/// Lazy representation of genotypes, that does no computation until a particular genotype is queried.
1242#[derive(Debug)]
1243pub struct Genotypes<'a, B>
1244where
1245    B: Borrow<Buffer> + 'a,
1246{
1247    encoded: BufferBacked<'a, Vec<&'a [i32]>, B>,
1248}
1249
1250impl<'a, B: Borrow<Buffer> + 'a> Genotypes<'a, B> {
1251    /// Get genotype of ith sample. So far, only supports diploid genotypes.
1252    ///
1253    /// Note that the result complies with the BCF spec. This means that the
1254    /// first allele will always be marked as `Unphased`. That is, if you have 1|1 in the VCF,
1255    /// this method will return `[Unphased(1), Phased(1)]`.
1256    pub fn get(&self, i: usize) -> Genotype {
1257        let igt = self.encoded[i];
1258        Genotype(igt.iter().map(|&e| GenotypeAllele::from(e)).collect())
1259    }
1260}
1261
1262impl Drop for Record {
1263    fn drop(&mut self) {
1264        unsafe { htslib::bcf_destroy(self.inner) };
1265    }
1266}
1267
1268unsafe impl Send for Record {}
1269
1270unsafe impl Sync for Record {}
1271
1272/// Info tag representation.
1273#[derive(Debug)]
1274pub struct Info<'a, B: BorrowMut<Buffer> + Borrow<Buffer>> {
1275    record: &'a Record,
1276    tag: &'a [u8],
1277    buffer: B,
1278}
1279
1280impl<'a, 'b, B: BorrowMut<Buffer> + Borrow<Buffer> + 'b> Info<'a, B> {
1281    /// Short description of info tag.
1282    pub fn desc(&self) -> String {
1283        str::from_utf8(self.tag).unwrap().to_owned()
1284    }
1285
1286    fn data(&mut self, data_type: u32) -> Result<Option<i32>> {
1287        let mut n: i32 = self.buffer.borrow().len;
1288        let c_str = ffi::CString::new(self.tag).unwrap();
1289        let ret = unsafe {
1290            htslib::bcf_get_info_values(
1291                self.record.header().inner,
1292                self.record.inner,
1293                c_str.as_ptr() as *mut c_char,
1294                &mut self.buffer.borrow_mut().inner,
1295                &mut n,
1296                data_type as i32,
1297            )
1298        };
1299        self.buffer.borrow_mut().len = n;
1300
1301        match ret {
1302            -1 => Err(Error::BcfUndefinedTag { tag: self.desc() }),
1303            -2 => Err(Error::BcfUnexpectedType { tag: self.desc() }),
1304            -3 => Ok(None),
1305            ret => Ok(Some(ret)),
1306        }
1307    }
1308
1309    /// Get integers from tag. `None` if tag not present in record.
1310    ///
1311    /// Import `bcf::record::Numeric` for missing value handling.
1312    ///
1313    /// **Attention:** the returned BufferBacked which holds the data has to be kept in scope
1314    /// as along as the data is accessed. If parts of the data are accessed while
1315    /// the BufferBacked object is already dropped, you will access unallocated
1316    /// memory.
1317    pub fn integer(mut self) -> Result<Option<BufferBacked<'b, &'b [i32], B>>> {
1318        self.data(htslib::BCF_HT_INT).map(|data| {
1319            data.map(|ret| {
1320                let values = unsafe {
1321                    slice::from_raw_parts(self.buffer.borrow().inner as *const i32, ret as usize)
1322                };
1323                BufferBacked::new(&values[..ret as usize], self.buffer)
1324            })
1325        })
1326    }
1327
1328    /// Get floats from tag. `None` if tag not present in record.
1329    ///
1330    /// Import `bcf::record::Numeric` for missing value handling.
1331    ///
1332    /// **Attention:** the returned BufferBacked which holds the data has to be kept in scope
1333    /// as along as the data is accessed. If parts of the data are accessed while
1334    /// the BufferBacked object is already dropped, you will access unallocated
1335    /// memory.
1336    pub fn float(mut self) -> Result<Option<BufferBacked<'b, &'b [f32], B>>> {
1337        self.data(htslib::BCF_HT_REAL).map(|data| {
1338            data.map(|ret| {
1339                let values = unsafe {
1340                    slice::from_raw_parts(self.buffer.borrow().inner as *const f32, ret as usize)
1341                };
1342                BufferBacked::new(&values[..ret as usize], self.buffer)
1343            })
1344        })
1345    }
1346
1347    /// Get flags from tag. `false` if not set.
1348    pub fn flag(&mut self) -> Result<bool> {
1349        self.data(htslib::BCF_HT_FLAG).map(|data| match data {
1350            Some(ret) => ret == 1,
1351            None => false,
1352        })
1353    }
1354
1355    /// Get strings from tag. `None` if tag not present in record.
1356    ///
1357    /// **Attention:** the returned BufferBacked which holds the data has to be kept in scope
1358    /// as along as the data is accessed. If parts of the data are accessed while
1359    /// the BufferBacked object is already dropped, you will access unallocated
1360    /// memory.
1361    pub fn string(mut self) -> Result<Option<BufferBacked<'b, Vec<&'b [u8]>, B>>> {
1362        self.data(htslib::BCF_HT_STR).map(|data| {
1363            data.map(|ret| {
1364                BufferBacked::new(
1365                    unsafe {
1366                        slice::from_raw_parts(self.buffer.borrow().inner as *const u8, ret as usize)
1367                    }
1368                    .split(|c| *c == b',')
1369                    .map(|s| {
1370                        // stop at zero character
1371                        s.split(|c| *c == 0u8)
1372                            .next()
1373                            .expect("Bug: returned string should not be empty.")
1374                    })
1375                    .collect(),
1376                    self.buffer,
1377                )
1378            })
1379        })
1380    }
1381}
1382
1383unsafe impl<'a, 'b, B: BorrowMut<Buffer> + Borrow<Buffer> + 'b> Send for Info<'a, B> {}
1384
1385unsafe impl<'a, 'b, B: BorrowMut<Buffer> + Borrow<Buffer> + 'b> Sync for Info<'a, B> {}
1386
1387fn trim_slice<T: PartialEq + NumericUtils>(s: &[T]) -> &[T] {
1388    s.split(|v| v.is_vector_end())
1389        .next()
1390        .expect("Bug: returned slice should not be empty.")
1391}
1392
1393// Representation of per-sample data.
1394#[derive(Debug)]
1395pub struct Format<'a, B: BorrowMut<Buffer> + Borrow<Buffer>> {
1396    record: &'a Record,
1397    tag: &'a [u8],
1398    inner: *mut htslib::bcf_fmt_t,
1399    buffer: B,
1400}
1401
1402impl<'a, 'b, B: BorrowMut<Buffer> + Borrow<Buffer> + 'b> Format<'a, B> {
1403    /// Create new format data in a given record.
1404    fn new(record: &'a Record, tag: &'a [u8], buffer: B) -> Format<'a, B> {
1405        let c_str = ffi::CString::new(tag).unwrap();
1406        let inner = unsafe {
1407            htslib::bcf_get_fmt(
1408                record.header().inner,
1409                record.inner,
1410                c_str.as_ptr() as *mut c_char,
1411            )
1412        };
1413        Format {
1414            record,
1415            tag,
1416            inner,
1417            buffer,
1418        }
1419    }
1420
1421    /// Provide short description of format entry (just the tag name).
1422    pub fn desc(&self) -> String {
1423        str::from_utf8(self.tag).unwrap().to_owned()
1424    }
1425
1426    pub fn inner(&self) -> &htslib::bcf_fmt_t {
1427        unsafe { &*self.inner }
1428    }
1429
1430    pub fn inner_mut(&mut self) -> &mut htslib::bcf_fmt_t {
1431        unsafe { &mut *self.inner }
1432    }
1433
1434    fn values_per_sample(&self) -> usize {
1435        self.inner().n as usize
1436    }
1437
1438    /// Read and decode format data into a given type.
1439    fn data(&mut self, data_type: u32) -> Result<i32> {
1440        let mut n: i32 = self.buffer.borrow().len;
1441        let c_str = ffi::CString::new(self.tag).unwrap();
1442        let ret = unsafe {
1443            htslib::bcf_get_format_values(
1444                self.record.header().inner,
1445                self.record.inner,
1446                c_str.as_ptr() as *mut c_char,
1447                &mut self.buffer.borrow_mut().inner,
1448                &mut n,
1449                data_type as i32,
1450            )
1451        };
1452        self.buffer.borrow_mut().len = n;
1453        match ret {
1454            -1 => Err(Error::BcfUndefinedTag { tag: self.desc() }),
1455            -2 => Err(Error::BcfUnexpectedType { tag: self.desc() }),
1456            -3 => Err(Error::BcfMissingTag {
1457                tag: self.desc(),
1458                record: self.record.desc(),
1459            }),
1460            ret => Ok(ret),
1461        }
1462    }
1463
1464    /// Get format data as integers.
1465    ///
1466    /// **Attention:** the returned BufferBacked which holds the data has to be kept in scope
1467    /// as long as the data is accessed. If parts of the data are accessed while
1468    /// the BufferBacked object is already dropped, you will access unallocated
1469    /// memory.
1470    pub fn integer(mut self) -> Result<BufferBacked<'b, Vec<&'b [i32]>, B>> {
1471        self.data(htslib::BCF_HT_INT).map(|ret| {
1472            BufferBacked::new(
1473                unsafe {
1474                    slice::from_raw_parts(
1475                        self.buffer.borrow_mut().inner as *const i32,
1476                        ret as usize,
1477                    )
1478                }
1479                .chunks(self.values_per_sample())
1480                .map(|s| trim_slice(s))
1481                .collect(),
1482                self.buffer,
1483            )
1484        })
1485    }
1486
1487    /// Get format data as floats.
1488    ///
1489    /// **Attention:** the returned BufferBacked which holds the data has to be kept in scope
1490    /// as along as the data is accessed. If parts of the data are accessed while
1491    /// the BufferBacked object is already dropped, you will access unallocated
1492    /// memory.
1493    pub fn float(mut self) -> Result<BufferBacked<'b, Vec<&'b [f32]>, B>> {
1494        self.data(htslib::BCF_HT_REAL).map(|ret| {
1495            BufferBacked::new(
1496                unsafe {
1497                    slice::from_raw_parts(
1498                        self.buffer.borrow_mut().inner as *const f32,
1499                        ret as usize,
1500                    )
1501                }
1502                .chunks(self.values_per_sample())
1503                .map(|s| trim_slice(s))
1504                .collect(),
1505                self.buffer,
1506            )
1507        })
1508    }
1509
1510    /// Get format data as byte slices. To obtain the values strings, use `std::str::from_utf8`.
1511    ///
1512    /// **Attention:** the returned BufferBacked which holds the data has to be kept in scope
1513    /// as along as the data is accessed. If parts of the data are accessed while
1514    /// the BufferBacked object is already dropped, you will access unallocated
1515    /// memory.
1516    pub fn string(mut self) -> Result<BufferBacked<'b, Vec<&'b [u8]>, B>> {
1517        self.data(htslib::BCF_HT_STR).map(|ret| {
1518            if ret == 0 {
1519                return BufferBacked::new(Vec::new(), self.buffer);
1520            }
1521            BufferBacked::new(
1522                unsafe {
1523                    slice::from_raw_parts(self.buffer.borrow_mut().inner as *const u8, ret as usize)
1524                }
1525                .chunks(self.values_per_sample())
1526                .map(|s| {
1527                    // stop at zero character
1528                    s.split(|c| *c == 0u8)
1529                        .next()
1530                        .expect("Bug: returned string should not be empty.")
1531                })
1532                .collect(),
1533                self.buffer,
1534            )
1535        })
1536    }
1537}
1538
1539unsafe impl<'a, 'b, B: BorrowMut<Buffer> + Borrow<Buffer> + 'b> Send for Format<'a, B> {}
1540
1541unsafe impl<'a, 'b, B: BorrowMut<Buffer> + Borrow<Buffer> + 'b> Sync for Format<'a, B> {}
1542
1543#[derive(Debug)]
1544pub struct Filters<'a> {
1545    /// Reference to the `Record` to enumerate records for.
1546    record: &'a Record,
1547    /// Index of the next filter to return, if not at end.
1548    idx: i32,
1549}
1550
1551impl<'a> Filters<'a> {
1552    pub fn new(record: &'a Record) -> Self {
1553        Filters { record, idx: 0 }
1554    }
1555}
1556
1557impl<'a> Iterator for Filters<'a> {
1558    type Item = Id;
1559
1560    fn next(&mut self) -> Option<Id> {
1561        if self.record.inner().d.n_flt <= self.idx {
1562            None
1563        } else {
1564            let i = self.idx as isize;
1565            self.idx += 1;
1566            Some(Id(unsafe { *self.record.inner().d.flt.offset(i) } as u32))
1567        }
1568    }
1569}
1570
1571#[cfg(test)]
1572mod tests {
1573    use super::*;
1574    use crate::bcf::{Format, Header, Writer};
1575    use tempfile::NamedTempFile;
1576
1577    #[test]
1578    fn test_missing_float() {
1579        let expected: u32 = 0x7F80_0001;
1580        assert_eq!(MISSING_FLOAT.bits(), expected);
1581    }
1582
1583    #[test]
1584    fn test_vector_end_float() {
1585        let expected: u32 = 0x7F80_0002;
1586        assert_eq!(VECTOR_END_FLOAT.bits(), expected);
1587    }
1588
1589    #[test]
1590    fn test_record_rlen() {
1591        let tmp = NamedTempFile::new().unwrap();
1592        let path = tmp.path();
1593        let header = Header::new();
1594        let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
1595        let mut record = vcf.empty_record();
1596        assert_eq!(record.rlen(), 0);
1597        let alleles: &[&[u8]] = &[b"AGG", b"TG"];
1598        record.set_alleles(alleles).expect("Failed to set alleles");
1599        assert_eq!(record.rlen(), 3)
1600    }
1601
1602    #[test]
1603    fn test_record_end() {
1604        let tmp = NamedTempFile::new().unwrap();
1605        let path = tmp.path();
1606        let header = Header::new();
1607        let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
1608        let mut record = vcf.empty_record();
1609        let alleles: &[&[u8]] = &[b"AGG", b"TG"];
1610        record.set_alleles(alleles).expect("Failed to set alleles");
1611        record.set_pos(5);
1612
1613        assert_eq!(record.end(), 8)
1614    }
1615
1616    #[test]
1617    fn test_record_clear() {
1618        let tmp = NamedTempFile::new().unwrap();
1619        let path = tmp.path();
1620        let mut header = Header::new();
1621        header.push_sample("sample".as_bytes());
1622        let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
1623        let mut record = vcf.empty_record();
1624        let alleles: &[&[u8]] = &[b"AGG", b"TG"];
1625        record.set_alleles(alleles).expect("Failed to set alleles");
1626        record.set_pos(6);
1627        record.clear();
1628
1629        assert_eq!(record.rlen(), 0);
1630        assert_eq!(record.sample_count(), 0);
1631        assert_eq!(record.pos(), 0)
1632    }
1633
1634    #[test]
1635    fn test_record_clone() {
1636        let tmp = NamedTempFile::new().unwrap();
1637        let path = tmp.path();
1638        let header = Header::new();
1639        let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
1640        let mut record = vcf.empty_record();
1641        let alleles: &[&[u8]] = &[b"AGG", b"TG"];
1642        record.set_alleles(alleles).expect("Failed to set alleles");
1643        record.set_pos(6);
1644
1645        let mut cloned_record = record.clone();
1646        cloned_record.set_pos(5);
1647
1648        assert_eq!(record.pos(), 6);
1649        assert_eq!(record.allele_count(), 2);
1650        assert_eq!(cloned_record.pos(), 5);
1651        assert_eq!(cloned_record.allele_count(), 2);
1652    }
1653
1654    #[test]
1655    fn test_record_has_filter_pass_is_default() {
1656        let tmp = NamedTempFile::new().unwrap();
1657        let path = tmp.path();
1658        let header = Header::new();
1659        let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
1660        let record = vcf.empty_record();
1661
1662        assert!(record.has_filter("PASS".as_bytes()));
1663        assert!(record.has_filter(".".as_bytes()));
1664        assert!(record.has_filter(&Id(0)));
1665        assert!(!record.has_filter("foo".as_bytes()));
1666        assert!(!record.has_filter(&Id(2)));
1667    }
1668
1669    #[test]
1670    fn test_record_has_filter_custom() {
1671        let tmp = NamedTempFile::new().unwrap();
1672        let path = tmp.path();
1673        let mut header = Header::new();
1674        header.push_record(br#"##FILTER=<ID=foo,Description="sample is a foo fighter">"#);
1675        let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
1676        let mut record = vcf.empty_record();
1677        record.push_filter("foo".as_bytes()).unwrap();
1678
1679        assert!(record.has_filter("foo".as_bytes()));
1680        assert!(!record.has_filter("PASS".as_bytes()))
1681    }
1682
1683    #[test]
1684    fn test_record_push_filter() {
1685        let tmp = NamedTempFile::new().unwrap();
1686        let path = tmp.path();
1687        let mut header = Header::new();
1688        header.push_record(br#"##FILTER=<ID=foo,Description="sample is a foo fighter">"#);
1689        header.push_record(br#"##FILTER=<ID=bar,Description="dranks">"#);
1690        let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
1691        let mut record = vcf.empty_record();
1692        assert!(record.has_filter("PASS".as_bytes()));
1693        record.push_filter("foo".as_bytes()).unwrap();
1694        let bar = record.header().name_to_id(b"bar").unwrap();
1695        record.push_filter(&bar).unwrap();
1696        assert!(record.has_filter("foo".as_bytes()));
1697        assert!(record.has_filter(&bar));
1698        assert!(!record.has_filter("PASS".as_bytes()));
1699        assert!(record.push_filter("baz".as_bytes()).is_err())
1700    }
1701
1702    #[test]
1703    fn test_record_set_filters() {
1704        let tmp = NamedTempFile::new().unwrap();
1705        let path = tmp.path();
1706        let mut header = Header::new();
1707        header.push_record(br#"##FILTER=<ID=foo,Description="sample is a foo fighter">"#);
1708        header.push_record(br#"##FILTER=<ID=bar,Description="a horse walks into...">"#);
1709        let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
1710        let mut record = vcf.empty_record();
1711        assert!(record.has_filter("PASS".as_bytes()));
1712        record
1713            .set_filters(&["foo".as_bytes(), "bar".as_bytes()])
1714            .unwrap();
1715        assert!(record.has_filter("foo".as_bytes()));
1716        assert!(record.has_filter("bar".as_bytes()));
1717        assert!(!record.has_filter("PASS".as_bytes()));
1718        let filters: &[&Id] = &[];
1719        record.set_filters(filters).unwrap();
1720        assert!(record.has_filter("PASS".as_bytes()));
1721        assert!(!record.has_filter("foo".as_bytes()));
1722        assert!(record
1723            .set_filters(&["foo".as_bytes(), "baz".as_bytes()])
1724            .is_err())
1725    }
1726
1727    #[test]
1728    fn test_record_remove_filter() {
1729        let tmp = NamedTempFile::new().unwrap();
1730        let path = tmp.path();
1731        let mut header = Header::new();
1732        header.push_record(br#"##FILTER=<ID=foo,Description="sample is a foo fighter">"#);
1733        header.push_record(br#"##FILTER=<ID=bar,Description="a horse walks into...">"#);
1734        let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
1735        let mut record = vcf.empty_record();
1736        let foo = record.header().name_to_id(b"foo").unwrap();
1737        let bar = record.header().name_to_id(b"bar").unwrap();
1738        record.set_filters(&[&foo, &bar]).unwrap();
1739        assert!(record.has_filter(&foo));
1740        assert!(record.has_filter(&bar));
1741        record.remove_filter(&foo, true).unwrap();
1742        assert!(!record.has_filter(&foo));
1743        assert!(record.has_filter(&bar));
1744        assert!(record.remove_filter("baz".as_bytes(), true).is_err());
1745        record.remove_filter(&bar, true).unwrap();
1746        assert!(!record.has_filter(&bar));
1747        assert!(record.has_filter("PASS".as_bytes()));
1748    }
1749
1750    #[test]
1751    fn test_record_to_vcf_string_err() {
1752        let tmp = NamedTempFile::new().unwrap();
1753        let path = tmp.path();
1754        let header = Header::new();
1755        let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
1756        let record = vcf.empty_record();
1757        assert!(record.to_vcf_string().is_err());
1758    }
1759
1760    #[test]
1761    fn test_record_to_vcf_string() {
1762        let tmp = NamedTempFile::new().unwrap();
1763        let path = tmp.path();
1764        let mut header = Header::new();
1765        header.push_record(b"##contig=<ID=chr1,length=1000>");
1766        header.push_record(br#"##FILTER=<ID=foo,Description="sample is a foo fighter">"#);
1767        let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
1768        let mut record = vcf.empty_record();
1769        record.push_filter("foo".as_bytes()).unwrap();
1770        assert_eq!(
1771            record.to_vcf_string().unwrap(),
1772            "chr1\t1\t.\t.\t.\t0\tfoo\t.\n"
1773        );
1774    }
1775}