1mod string_map;
4
5use std::str::{FromStr, Lines};
6
7pub use self::string_map::StringMap;
8use crate::{
9 header::{
10 parser::{parse_record, Entry},
11 FileFormat, ParseError, Record,
12 },
13 Header,
14};
15
16pub type StringStringMap = StringMap;
18
19pub type ContigStringMap = StringMap;
21
22#[derive(Clone, Debug, Eq, PartialEq)]
26pub struct StringMaps {
27 string_string_map: StringStringMap,
28 contig_string_map: ContigStringMap,
29}
30
31impl StringMaps {
32 pub fn strings(&self) -> &StringStringMap {
65 &self.string_string_map
66 }
67
68 fn strings_mut(&mut self) -> &mut StringStringMap {
69 &mut self.string_string_map
70 }
71
72 pub fn contigs(&self) -> &ContigStringMap {
101 &self.contig_string_map
102 }
103
104 fn contigs_mut(&mut self) -> &mut ContigStringMap {
105 &mut self.contig_string_map
106 }
107
108 #[doc(hidden)]
109 pub fn insert_entry(&mut self, entry: &Entry<'_>) -> Result<(), ParseError> {
110 match entry {
111 Entry::Contig(id, contig) => insert(self.contigs_mut(), id, contig.idx()),
112 Entry::Filter(id, filter) => insert(self.strings_mut(), id, filter.idx()),
113 Entry::Format(id, format) => insert(self.strings_mut(), id, format.idx()),
114 Entry::Info(id, info) => insert(self.strings_mut(), id, info.idx()),
115 _ => Ok(()),
116 }
117 }
118}
119
120impl Default for StringMaps {
121 fn default() -> Self {
122 let mut string_string_map = StringMap::default();
125 string_string_map.insert(String::from("PASS"));
126
127 let contig_string_map = StringMap::default();
128
129 Self {
130 string_string_map,
131 contig_string_map,
132 }
133 }
134}
135
136impl FromStr for StringMaps {
137 type Err = ParseError;
138
139 fn from_str(s: &str) -> Result<Self, Self::Err> {
140 let mut string_maps = Self::default();
141
142 let mut lines = s.lines();
143 let file_format = parse_file_format(&mut lines)?;
144
145 for line in &mut lines {
146 if line.starts_with("#CHROM") {
147 break;
148 }
149
150 let record =
151 parse_record(line.as_bytes(), file_format).map_err(ParseError::InvalidRecord)?;
152
153 match record {
154 Record::Contig(id, contig) => {
155 insert(string_maps.contigs_mut(), id.as_ref(), contig.idx())?;
156 }
157 Record::Filter(id, filter) => {
158 insert(string_maps.strings_mut(), &id, filter.idx())?;
159 }
160 Record::Format(id, format) => {
161 insert(string_maps.strings_mut(), id.as_ref(), format.idx())?;
162 }
163 Record::Info(id, info) => {
164 insert(string_maps.strings_mut(), id.as_ref(), info.idx())?;
165 }
166 _ => {}
167 }
168 }
169
170 Ok(string_maps)
171 }
172}
173
174fn parse_file_format(lines: &mut Lines<'_>) -> Result<FileFormat, ParseError> {
175 let record = lines
176 .next()
177 .ok_or(ParseError::MissingFileFormat)
178 .and_then(|line| {
179 parse_record(line.as_bytes(), Default::default()).map_err(ParseError::InvalidRecord)
180 })?;
181
182 match record {
183 Record::FileFormat(file_format) => Ok(file_format),
184 _ => Err(ParseError::MissingFileFormat),
185 }
186}
187
188fn insert(string_map: &mut StringMap, id: &str, idx: Option<usize>) -> Result<(), ParseError> {
189 if let Some(i) = idx {
190 if let Some((j, entry)) = string_map.get_full(id) {
191 let actual = (i, id.into());
192 let expected = (j, entry.into());
193
194 if actual != expected {
195 return Err(ParseError::StringMapPositionMismatch(actual, expected));
196 }
197 } else {
198 string_map.insert_at(i, id.into());
199 }
200 } else {
201 string_map.insert(id.into());
202 }
203
204 Ok(())
205}
206
207impl TryFrom<&Header> for StringMaps {
208 type Error = ParseError;
209
210 fn try_from(header: &Header) -> Result<Self, Self::Error> {
211 let mut string_maps = StringMaps::default();
212
213 for (id, contig) in header.contigs() {
214 insert(string_maps.contigs_mut(), id.as_ref(), contig.idx())?;
215 }
216
217 for (id, info) in header.infos() {
218 insert(string_maps.strings_mut(), id.as_ref(), info.idx())?;
219 }
220
221 for (id, filter) in header.filters() {
222 insert(string_maps.strings_mut(), id, filter.idx())?;
223 }
224
225 for (id, format) in header.formats() {
226 insert(string_maps.strings_mut(), id.as_ref(), format.idx())?;
227 }
228
229 Ok(string_maps)
230 }
231}
232
233#[cfg(test)]
234mod tests {
235 use super::*;
236
237 #[test]
238 fn test_default() {
239 let actual = StringMaps::default();
240
241 let mut string_string_map = StringMap::default();
242 string_string_map.insert("PASS".into());
243
244 let contig_string_map = StringMap::default();
245
246 let expected = StringMaps {
247 string_string_map,
248 contig_string_map,
249 };
250
251 assert_eq!(actual, expected);
252 }
253
254 #[test]
255 fn test_from_str() {
256 let s = r#"##fileformat=VCFv4.3
257##fileDate=20210412
258##contig=<ID=sq0,length=8>
259##contig=<ID=sq1,length=13>
260##contig=<ID=sq2,length=21>
261##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of samples with data",IDX=1>
262##INFO=<ID=DP,Number=1,Type=Integer,Description="Combined depth across samples",IDX=2>
263##FILTER=<ID=PASS,Description="All filters passed",IDX=0>
264##FILTER=<ID=q10,Description="Quality below 10",IDX=3>
265##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype",IDX=4>
266##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read depth",IDX=2>
267##ALT=<ID=DEL,Description="Deletion">
268#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT sample0
269"#;
270
271 let string_string_map = StringMap {
272 indices: [
273 (String::from("PASS"), 0),
274 (String::from("NS"), 1),
275 (String::from("DP"), 2),
276 (String::from("q10"), 3),
277 (String::from("GT"), 4),
278 ]
279 .into_iter()
280 .collect(),
281 entries: vec![
282 Some(String::from("PASS")),
283 Some(String::from("NS")),
284 Some(String::from("DP")),
285 Some(String::from("q10")),
286 Some(String::from("GT")),
287 ],
288 };
289
290 let contig_string_map = StringMap {
291 indices: [
292 (String::from("sq0"), 0),
293 (String::from("sq1"), 1),
294 (String::from("sq2"), 2),
295 ]
296 .into_iter()
297 .collect(),
298 entries: vec![
299 Some(String::from("sq0")),
300 Some(String::from("sq1")),
301 Some(String::from("sq2")),
302 ],
303 };
304
305 let expected = StringMaps {
306 string_string_map,
307 contig_string_map,
308 };
309
310 assert_eq!(s.parse(), Ok(expected));
311 }
312
313 #[test]
314 fn test_from_str_with_file_format() {
315 let s = r#"##fileformat=VCFv4.2
317##FORMAT=<ID=MQ,Number=1,Type=Integer,Description="RMS mapping quality">
318#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT sample0
319"#;
320
321 let mut string_string_map = StringMap::default();
322 string_string_map.insert(String::from("PASS"));
323 string_string_map.insert(String::from("MQ"));
324
325 let contig_string_map = StringMap::default();
326
327 let expected = StringMaps {
328 string_string_map,
329 contig_string_map,
330 };
331
332 assert_eq!(s.parse(), Ok(expected));
333 }
334
335 #[test]
336 fn test_from_str_with_mixed_positions() {
337 let s = r#"##fileformat=VCFv4.3
338##fileDate=20210412
339##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of samples with data",IDX=1>
340##FILTER=<ID=PASS,Description="All filters passed",IDX=0>
341##FILTER=<ID=q10,Description="Quality below 10",IDX=3>
342##FILTER=<ID=q15,Description="Quality below 15",IDX=4>
343##FILTER=<ID=q20,Description="Quality below 20">
344##FILTER=<ID=NS,Description="">
345#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT sample0
346"#;
347
348 let string_string_map = StringMap {
349 indices: [
350 (String::from("PASS"), 0),
351 (String::from("NS"), 1),
352 (String::from("q10"), 3),
353 (String::from("q15"), 4),
354 (String::from("q20"), 5),
355 ]
356 .into_iter()
357 .collect(),
358 entries: vec![
359 Some(String::from("PASS")),
360 Some(String::from("NS")),
361 None,
362 Some(String::from("q10")),
363 Some(String::from("q15")),
364 Some(String::from("q20")),
365 ],
366 };
367
368 let contig_string_map = StringMap::default();
369
370 let expected = StringMaps {
371 string_string_map,
372 contig_string_map,
373 };
374
375 assert_eq!(s.parse(), Ok(expected));
376 }
377
378 #[test]
379 fn test_from_str_with_a_position_mismatch() {
380 let s = r#"##fileformat=VCFv4.3
381##FILTER=<ID=PASS,Description="All filters passed",IDX=8>
382#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT sample0
383"#;
384
385 assert_eq!(
386 s.parse::<StringMaps>(),
387 Err(ParseError::StringMapPositionMismatch(
388 (8, String::from("PASS")),
389 (0, String::from("PASS"))
390 ))
391 );
392
393 let s = r#"##fileformat=VCFv4.3
394##INFO=<ID=DP,Number=1,Type=Integer,Description="Combined depth across samples",IDX=1>
395##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read depth",IDX=2>
396#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT sample0
397"#;
398
399 assert_eq!(
400 s.parse::<StringMaps>(),
401 Err(ParseError::StringMapPositionMismatch(
402 (2, String::from("DP")),
403 (1, String::from("DP"))
404 ))
405 );
406 }
407
408 #[test]
409 fn test_try_from_vcf_header_for_string_maps() -> Result<(), Box<dyn std::error::Error>> {
410 use crate::{
411 header::record::value::{
412 map::{AlternativeAllele, Contig, Filter, Format, Info},
413 Map,
414 },
415 variant::record::{info, samples},
416 };
417
418 let header = Header::builder()
419 .add_contig("sq0", Map::<Contig>::new())
420 .add_contig("sq1", Map::<Contig>::new())
421 .add_contig("sq2", Map::<Contig>::new())
422 .add_info(
423 info::field::key::SAMPLES_WITH_DATA_COUNT,
424 Map::<Info>::from(info::field::key::SAMPLES_WITH_DATA_COUNT),
425 )
426 .add_info(
427 info::field::key::TOTAL_DEPTH,
428 Map::<Info>::from(info::field::key::TOTAL_DEPTH),
429 )
430 .add_filter("PASS", Map::<Filter>::pass())
431 .add_filter("q10", Map::<Filter>::new("Quality below 10"))
432 .add_format(
433 samples::keys::key::GENOTYPE,
434 Map::<Format>::from(samples::keys::key::GENOTYPE),
435 )
436 .add_format(
437 samples::keys::key::READ_DEPTH,
438 Map::<Format>::from(samples::keys::key::READ_DEPTH),
439 )
440 .add_alternative_allele("DEL", Map::<AlternativeAllele>::new("Deletion"))
441 .build();
442
443 let actual = StringMaps::try_from(&header)?;
444
445 let string_string_map = StringMap {
446 indices: [
447 (String::from("PASS"), 0),
448 (String::from("NS"), 1),
449 (String::from("DP"), 2),
450 (String::from("q10"), 3),
451 (String::from("GT"), 4),
452 ]
453 .into_iter()
454 .collect(),
455 entries: vec![
456 Some(String::from("PASS")),
457 Some(String::from("NS")),
458 Some(String::from("DP")),
459 Some(String::from("q10")),
460 Some(String::from("GT")),
461 ],
462 };
463
464 let contig_string_map = StringMap {
465 indices: [
466 (String::from("sq0"), 0),
467 (String::from("sq1"), 1),
468 (String::from("sq2"), 2),
469 ]
470 .into_iter()
471 .collect(),
472 entries: vec![
473 Some(String::from("sq0")),
474 Some(String::from("sq1")),
475 Some(String::from("sq2")),
476 ],
477 };
478
479 let expected = StringMaps {
480 string_string_map,
481 contig_string_map,
482 };
483
484 assert_eq!(actual, expected);
485
486 Ok(())
487 }
488
489 #[test]
490 fn test_try_from_vcf_header_for_string_maps_with_idx() -> Result<(), Box<dyn std::error::Error>>
491 {
492 use crate::{
493 header::record::value::{
494 map::{Filter, Info},
495 Map,
496 },
497 variant::record::info,
498 };
499
500 let ns = {
501 let mut map = Map::<Info>::from(info::field::key::SAMPLES_WITH_DATA_COUNT);
502 *map.idx_mut() = Some(1);
503 map
504 };
505
506 let header = Header::builder()
507 .add_filter(
508 "PASS",
509 Map::<Filter>::builder()
510 .set_description("All filters passed")
511 .set_idx(0)
512 .build()?,
513 )
514 .add_filter(
515 "q10",
516 Map::<Filter>::builder()
517 .set_description("Quality below 10")
518 .set_idx(3)
519 .build()?,
520 )
521 .add_filter(
522 "q15",
523 Map::<Filter>::builder()
524 .set_description("Quality below 15")
525 .set_idx(4)
526 .build()?,
527 )
528 .add_filter("q20", Map::<Filter>::new("Quality below 20"))
529 .add_filter("NS", Map::<Filter>::new(""))
530 .add_info(info::field::key::SAMPLES_WITH_DATA_COUNT, ns)
531 .build();
532
533 let actual = StringMaps::try_from(&header)?;
534
535 let string_string_map = StringMap {
536 indices: [
537 (String::from("PASS"), 0),
538 (String::from("NS"), 1),
539 (String::from("q10"), 3),
540 (String::from("q15"), 4),
541 (String::from("q20"), 5),
542 ]
543 .into_iter()
544 .collect(),
545 entries: vec![
546 Some(String::from("PASS")),
547 Some(String::from("NS")),
548 None,
549 Some(String::from("q10")),
550 Some(String::from("q15")),
551 Some(String::from("q20")),
552 ],
553 };
554
555 let contig_string_map = StringMap::default();
556
557 let expected = StringMaps {
558 string_string_map,
559 contig_string_map,
560 };
561
562 assert_eq!(actual, expected);
563
564 Ok(())
565 }
566
567 #[test]
568 fn test_parse_file_format() {
569 let s = "##fileformat=VCFv4.3\n";
570 let mut lines = s.lines();
571 assert_eq!(parse_file_format(&mut lines), Ok(FileFormat::new(4, 3)));
572
573 let s = "";
574 let mut lines = s.lines();
575 assert_eq!(
576 parse_file_format(&mut lines),
577 Err(ParseError::MissingFileFormat)
578 );
579
580 let s = "##fileDate=20211119";
581 let mut lines = s.lines();
582 assert_eq!(
583 parse_file_format(&mut lines),
584 Err(ParseError::MissingFileFormat)
585 );
586
587 let s = "fileformat=VCFv4.3";
588 let mut lines = s.lines();
589 assert!(matches!(
590 parse_file_format(&mut lines),
591 Err(ParseError::InvalidRecord(_))
592 ));
593
594 let s = "##fileformat=VCF43\n";
595 let mut lines = s.lines();
596 assert!(matches!(
597 parse_file_format(&mut lines),
598 Err(ParseError::InvalidRecord(_))
599 ));
600 }
601}