rust_htslib/bam/
header.rs1use crate::bam::HeaderView;
7use lazy_static::lazy_static;
8use linear_map::LinearMap;
9use regex::Regex;
10use std::borrow::Cow;
11use std::collections::HashMap;
12
13#[derive(Debug, Clone)]
15pub struct Header {
16 records: Vec<Vec<u8>>,
17}
18
19impl Default for Header {
20 fn default() -> Self {
21 Self::new()
22 }
23}
24
25impl Header {
26 pub fn new() -> Self {
28 Header {
29 records: Vec::new(),
30 }
31 }
32
33 pub fn from_template(header: &HeaderView) -> Self {
34 let mut record = header.as_bytes().to_owned();
35 while let Some(&last_char) = record.last() {
40 if last_char == b'\n' {
41 record.pop();
42 } else {
43 break;
44 }
45 }
46 Header {
47 records: vec![record],
48 }
49 }
50
51 pub fn push_record(&mut self, record: &HeaderRecord<'_>) -> &mut Self {
53 self.records.push(record.to_bytes());
54 self
55 }
56
57 pub fn push_comment(&mut self, comment: &[u8]) -> &mut Self {
59 self.records.push([&b"@CO"[..], comment].join(&b'\t'));
60 self
61 }
62
63 pub fn to_bytes(&self) -> Vec<u8> {
64 self.records.join(&b'\n')
65 }
66
67 pub fn to_hashmap(&self) -> HashMap<String, Vec<LinearMap<String, String>>> {
71 let mut header_map = HashMap::default();
72
73 lazy_static! {
74 static ref REC_TYPE_RE: Regex = Regex::new(r"@([A-Z][A-Z])").unwrap();
75 static ref TAG_RE: Regex = Regex::new(r"([A-Za-z][A-Za-z0-9]):([ -~]*)").unwrap();
76 }
77
78 let header_string = String::from_utf8(self.to_bytes()).unwrap();
79
80 for line in header_string.split('\n').filter(|x| !x.is_empty()) {
81 let parts: Vec<_> = line.split('\t').filter(|x| !x.is_empty()).collect();
82 let record_type = REC_TYPE_RE
84 .captures(parts[0])
85 .unwrap()
86 .get(1)
87 .unwrap()
88 .as_str()
89 .to_owned();
90 if record_type.eq("CO") {
91 continue;
92 }
93 let mut field = LinearMap::default();
94 for part in parts.iter().skip(1) {
95 let cap = TAG_RE.captures(part).unwrap();
96 let tag = cap.get(1).unwrap().as_str().to_owned();
97 let value = cap.get(2).unwrap().as_str().to_owned();
98 field.insert(tag, value);
99 }
100 header_map
101 .entry(record_type)
102 .or_insert_with(Vec::new)
103 .push(field);
104 }
105 header_map
106 }
107
108 pub fn comments(&self) -> impl Iterator<Item = Cow<str>> {
110 self.records.iter().flat_map(|r| {
111 r.split(|x| x == &b'\n')
112 .filter(|x| x.starts_with(b"@CO\t"))
113 .map(|x| String::from_utf8_lossy(&x[4..]))
114 })
115 }
116}
117
118#[derive(Debug, Clone)]
120pub struct HeaderRecord<'a> {
121 rec_type: Vec<u8>,
122 tags: Vec<(&'a [u8], Vec<u8>)>,
123}
124
125impl<'a> HeaderRecord<'a> {
126 pub fn new(rec_type: &'a [u8]) -> Self {
129 HeaderRecord {
130 rec_type: [&b"@"[..], rec_type].concat(),
131 tags: Vec::new(),
132 }
133 }
134
135 pub fn push_tag<V: ToString>(&mut self, tag: &'a [u8], value: V) -> &mut Self {
143 self.tags.push((tag, value.to_string().into_bytes()));
144 self
145 }
146
147 fn to_bytes(&self) -> Vec<u8> {
148 let mut out = Vec::new();
149 out.extend(self.rec_type.iter());
150 for &(tag, ref value) in self.tags.iter() {
151 out.push(b'\t');
152 out.extend(tag.iter());
153 out.push(b':');
154 out.extend(value.iter());
155 }
156 out
157 }
158}
159
160#[cfg(test)]
161mod tests {
162 use super::HeaderRecord;
163
164 #[test]
165 fn test_push_tag() {
166 let mut record = HeaderRecord::new(b"HD");
167 record.push_tag(b"X1", 0);
168 record.push_tag(b"X2", &0);
169
170 let x = "x".to_string();
171 record.push_tag(b"X3", x.as_str());
172 record.push_tag(b"X4", &x);
173 record.push_tag(b"X5", x);
174
175 assert_eq!(record.to_bytes(), b"@HD\tX1:0\tX2:0\tX3:x\tX4:x\tX5:x");
176 }
177}