1use std::ffi;
36use std::os::raw::c_char;
37use std::rc::Rc;
38use std::slice;
39use std::str;
40
41use crate::htslib;
42
43use linear_map::LinearMap;
44
45use crate::errors::{Error, Result};
46
47pub type SampleSubset = Vec<i32>;
48
49custom_derive! {
50 #[derive(
52 NewtypeFrom,
53 NewtypeDeref,
54 PartialEq,
55 PartialOrd,
56 Eq,
57 Ord,
58 Copy,
59 Clone,
60 Debug
61 )]
62 pub struct Id(pub u32);
63}
64
65#[derive(Debug)]
67pub struct Header {
68 pub inner: *mut htslib::bcf_hdr_t,
69 pub subset: Option<SampleSubset>,
70}
71
72impl Default for Header {
73 fn default() -> Self {
74 Self::new()
75 }
76}
77
78impl Header {
79 pub fn new() -> Self {
81 let c_str = ffi::CString::new(&b"w"[..]).unwrap();
82 Header {
83 inner: unsafe { htslib::bcf_hdr_init(c_str.as_ptr()) },
84 subset: None,
85 }
86 }
87
88 pub fn from_template(header: &HeaderView) -> Self {
96 Header {
97 inner: unsafe { htslib::bcf_hdr_dup(header.inner) },
98 subset: None,
99 }
100 }
101
102 pub fn from_template_subset(header: &HeaderView, samples: &[&[u8]]) -> Result<Self> {
110 let mut imap = vec![0; samples.len()];
111 let names: Vec<_> = samples
112 .iter()
113 .map(|&s| ffi::CString::new(s).unwrap())
114 .collect();
115 let name_pointers: Vec<_> = names.iter().map(|s| s.as_ptr() as *mut i8).collect();
116 let inner = unsafe {
117 htslib::bcf_hdr_subset(
118 header.inner,
119 samples.len() as i32,
120 name_pointers.as_ptr() as *const *mut c_char,
121 imap.as_mut_ptr() as *mut i32,
122 )
123 };
124 if inner.is_null() {
125 Err(Error::BcfDuplicateSampleNames)
126 } else {
127 Ok(Header {
128 inner,
129 subset: Some(imap),
130 })
131 }
132 }
133
134 pub fn push_sample(&mut self, sample: &[u8]) -> &mut Self {
140 let c_str = ffi::CString::new(sample).unwrap();
141 unsafe { htslib::bcf_hdr_add_sample(self.inner, c_str.as_ptr()) };
142 self
143 }
144
145 pub fn push_record(&mut self, record: &[u8]) -> &mut Self {
157 let c_str = ffi::CString::new(record).unwrap();
158 unsafe { htslib::bcf_hdr_append(self.inner, c_str.as_ptr()) };
159 self
160 }
161
162 pub fn remove_filter(&mut self, tag: &[u8]) -> &mut Self {
168 self.remove_impl(tag, htslib::BCF_HL_FLT)
169 }
170
171 pub fn remove_info(&mut self, tag: &[u8]) -> &mut Self {
177 self.remove_impl(tag, htslib::BCF_HL_INFO)
178 }
179
180 pub fn remove_format(&mut self, tag: &[u8]) -> &mut Self {
186 self.remove_impl(tag, htslib::BCF_HL_FMT)
187 }
188
189 pub fn remove_contig(&mut self, tag: &[u8]) -> &mut Self {
195 self.remove_impl(tag, htslib::BCF_HL_CTG)
196 }
197
198 pub fn remove_structured(&mut self, tag: &[u8]) -> &mut Self {
204 self.remove_impl(tag, htslib::BCF_HL_STR)
205 }
206
207 pub fn remove_generic(&mut self, tag: &[u8]) -> &mut Self {
213 self.remove_impl(tag, htslib::BCF_HL_GEN)
214 }
215
216 fn remove_impl(&mut self, tag: &[u8], type_: u32) -> &mut Self {
218 unsafe {
219 let v = tag.to_vec();
220 let c_str = ffi::CString::new(v).unwrap();
221 htslib::bcf_hdr_remove(self.inner, type_ as i32, c_str.as_ptr());
222 }
223 self
224 }
225}
226
227impl Drop for Header {
228 fn drop(&mut self) {
229 unsafe { htslib::bcf_hdr_destroy(self.inner) };
230 }
231}
232
233#[derive(Debug)]
235pub enum HeaderRecord {
236 Filter {
238 key: String,
239 values: LinearMap<String, String>,
240 },
241 Info {
243 key: String,
244 values: LinearMap<String, String>,
245 },
246 Format {
248 key: String,
249 values: LinearMap<String, String>,
250 },
251 Contig {
253 key: String,
254 values: LinearMap<String, String>,
255 },
256 Structured {
258 key: String,
259 values: LinearMap<String, String>,
260 },
261 Generic { key: String, value: String },
263}
264
265#[derive(Debug)]
266pub struct HeaderView {
267 pub inner: *mut htslib::bcf_hdr_t,
268}
269
270impl HeaderView {
271 pub fn new(inner: *mut htslib::bcf_hdr_t) -> Self {
272 HeaderView { inner }
273 }
274
275 #[inline]
276 fn inner(&self) -> htslib::bcf_hdr_t {
277 unsafe { *self.inner }
278 }
279
280 pub fn sample_count(&self) -> u32 {
282 self.inner().n[htslib::BCF_DT_SAMPLE as usize] as u32
283 }
284
285 pub fn samples(&self) -> Vec<&[u8]> {
287 let names =
288 unsafe { slice::from_raw_parts(self.inner().samples, self.sample_count() as usize) };
289 names
290 .iter()
291 .map(|name| unsafe { ffi::CStr::from_ptr(*name).to_bytes() })
292 .collect()
293 }
294
295 pub fn sample_id(&self, sample: &[u8]) -> Option<usize> {
298 self.samples().iter().position(|s| *s == sample)
299 }
300
301 pub fn contig_count(&self) -> u32 {
303 self.inner().n[htslib::BCF_DT_CTG as usize] as u32
304 }
305
306 pub fn rid2name(&self, rid: u32) -> Result<&[u8]> {
307 if rid <= self.contig_count() {
308 unsafe {
309 let dict = self.inner().id[htslib::BCF_DT_CTG as usize];
310 let ptr = (*dict.offset(rid as isize)).key;
311 Ok(ffi::CStr::from_ptr(ptr).to_bytes())
312 }
313 } else {
314 Err(Error::BcfUnknownRID { rid })
315 }
316 }
317
318 pub fn name2rid(&self, name: &[u8]) -> Result<u32> {
338 let c_str = ffi::CString::new(name).unwrap();
339 unsafe {
340 match htslib::bcf_hdr_id2int(
341 self.inner,
342 htslib::BCF_DT_CTG as i32,
343 c_str.as_ptr() as *mut c_char,
344 ) {
345 -1 => Err(Error::BcfUnknownContig {
346 contig: str::from_utf8(name).unwrap().to_owned(),
347 }),
348 i => Ok(i as u32),
349 }
350 }
351 }
352
353 pub fn info_type(&self, tag: &[u8]) -> Result<(TagType, TagLength)> {
354 self.tag_type(tag, htslib::BCF_HL_INFO)
355 }
356
357 pub fn format_type(&self, tag: &[u8]) -> Result<(TagType, TagLength)> {
358 self.tag_type(tag, htslib::BCF_HL_FMT)
359 }
360
361 fn tag_type(&self, tag: &[u8], hdr_type: ::libc::c_uint) -> Result<(TagType, TagLength)> {
362 let tag_desc = || str::from_utf8(tag).unwrap().to_owned();
363 let c_str_tag = ffi::CString::new(tag).unwrap();
364 let (_type, length, num_values) = unsafe {
365 let id = htslib::bcf_hdr_id2int(
366 self.inner,
367 htslib::BCF_DT_ID as i32,
368 c_str_tag.as_ptr() as *mut c_char,
369 );
370 if id < 0 {
371 return Err(Error::BcfUndefinedTag { tag: tag_desc() });
372 }
373 let n = (*self.inner).n[htslib::BCF_DT_ID as usize] as usize;
374 let entry = slice::from_raw_parts((*self.inner).id[htslib::BCF_DT_ID as usize], n);
375 let d = (*entry[id as usize].val).info[hdr_type as usize];
376 (d >> 4 & 0xf, d >> 8 & 0xf, d >> 12)
377 };
378 let _type = match _type as ::libc::c_uint {
379 htslib::BCF_HT_FLAG => TagType::Flag,
380 htslib::BCF_HT_INT => TagType::Integer,
381 htslib::BCF_HT_REAL => TagType::Float,
382 htslib::BCF_HT_STR => TagType::String,
383 _ => return Err(Error::BcfUnexpectedType { tag: tag_desc() }),
384 };
385 let length = match length as ::libc::c_uint {
386 htslib::BCF_VL_FIXED => TagLength::Fixed(num_values as u32),
388 htslib::BCF_VL_VAR => TagLength::Variable,
389 htslib::BCF_VL_A => TagLength::AltAlleles,
390 htslib::BCF_VL_R => TagLength::Alleles,
391 htslib::BCF_VL_G => TagLength::Genotypes,
392 _ => return Err(Error::BcfUnexpectedType { tag: tag_desc() }),
393 };
394
395 Ok((_type, length))
396 }
397
398 pub fn name_to_id(&self, id: &[u8]) -> Result<Id> {
400 let c_str = ffi::CString::new(id).unwrap();
401 unsafe {
402 match htslib::bcf_hdr_id2int(
403 self.inner,
404 htslib::BCF_DT_ID as i32,
405 c_str.as_ptr() as *const c_char,
406 ) {
407 -1 => Err(Error::BcfUnknownID {
408 id: str::from_utf8(id).unwrap().to_owned(),
409 }),
410 i => Ok(Id(i as u32)),
411 }
412 }
413 }
414
415 pub fn id_to_name(&self, id: Id) -> Vec<u8> {
418 let key = unsafe {
419 ffi::CStr::from_ptr(
420 (*(*self.inner).id[htslib::BCF_DT_ID as usize].offset(*id as isize)).key,
421 )
422 };
423 key.to_bytes().to_vec()
424 }
425
426 pub fn sample_to_id(&self, id: &[u8]) -> Result<Id> {
428 let c_str = ffi::CString::new(id).unwrap();
429 unsafe {
430 match htslib::bcf_hdr_id2int(
431 self.inner,
432 htslib::BCF_DT_SAMPLE as i32,
433 c_str.as_ptr() as *const c_char,
434 ) {
435 -1 => Err(Error::BcfUnknownSample {
436 name: str::from_utf8(id).unwrap().to_owned(),
437 }),
438 i => Ok(Id(i as u32)),
439 }
440 }
441 }
442
443 pub fn id_to_sample(&self, id: Id) -> Vec<u8> {
445 let key = unsafe {
446 ffi::CStr::from_ptr(
447 (*(*self.inner).id[htslib::BCF_DT_SAMPLE as usize].offset(*id as isize)).key,
448 )
449 };
450 key.to_bytes().to_vec()
451 }
452
453 pub fn header_records(&self) -> Vec<HeaderRecord> {
455 fn parse_kv(rec: &htslib::bcf_hrec_t) -> LinearMap<String, String> {
456 let mut result: LinearMap<String, String> = LinearMap::new();
457 for i in 0_i32..(rec.nkeys) {
458 let key = unsafe {
459 ffi::CStr::from_ptr(*rec.keys.offset(i as isize))
460 .to_str()
461 .unwrap()
462 .to_string()
463 };
464 let value = unsafe {
465 ffi::CStr::from_ptr(*rec.vals.offset(i as isize))
466 .to_str()
467 .unwrap()
468 .to_string()
469 };
470 result.insert(key, value);
471 }
472 result
473 }
474
475 let mut result: Vec<HeaderRecord> = Vec::new();
476 for i in 0_i32..unsafe { (*self.inner).nhrec } {
477 let rec = unsafe { &(**(*self.inner).hrec.offset(i as isize)) };
478 let key = unsafe { ffi::CStr::from_ptr(rec.key).to_str().unwrap().to_string() };
479 let record = match rec.type_ {
480 0 => HeaderRecord::Filter {
481 key,
482 values: parse_kv(rec),
483 },
484 1 => HeaderRecord::Info {
485 key,
486 values: parse_kv(rec),
487 },
488 2 => HeaderRecord::Format {
489 key,
490 values: parse_kv(rec),
491 },
492 3 => HeaderRecord::Contig {
493 key,
494 values: parse_kv(rec),
495 },
496 4 => HeaderRecord::Structured {
497 key,
498 values: parse_kv(rec),
499 },
500 5 => HeaderRecord::Generic {
501 key,
502 value: unsafe { ffi::CStr::from_ptr(rec.value).to_str().unwrap().to_string() },
503 },
504 _ => panic!("Unknown type: {}", rec.type_),
505 };
506 result.push(record);
507 }
508 result
509 }
510
511 pub fn empty_record(&self) -> crate::bcf::Record {
515 crate::bcf::Record::new(Rc::new(self.clone()))
516 }
517}
518
519impl Clone for HeaderView {
520 fn clone(&self) -> Self {
521 HeaderView {
522 inner: unsafe { htslib::bcf_hdr_dup(self.inner) },
523 }
524 }
525}
526
527impl Drop for HeaderView {
528 fn drop(&mut self) {
529 unsafe {
530 htslib::bcf_hdr_destroy(self.inner);
531 }
532 }
533}
534
535#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
536pub enum TagType {
537 Flag,
538 Integer,
539 Float,
540 String,
541}
542
543#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
544pub enum TagLength {
545 Fixed(u32),
546 AltAlleles,
547 Alleles,
548 Genotypes,
549 Variable,
550}
551
552#[cfg(test)]
553mod tests {
554 use super::*;
555 use crate::bcf::Reader;
556
557 #[test]
558 fn test_header_view_empty_record() {
559 let vcf = Reader::from_path("test/test_string.vcf").expect("Error opening file");
561 let header_view = vcf.header.clone();
562
563 let record = header_view.empty_record();
565 eprintln!("{:?}", record.rid());
566
567 assert_eq!(record.rid(), Some(0)); assert_eq!(record.pos(), 0); assert_eq!(record.qual(), 0.0); }
572}