noodles_vcf/record/samples/series/
value.rs

1//! VCF record samples series value.
2
3mod genotype;
4
5use std::io;
6
7pub use self::genotype::Genotype;
8use crate::{
9    io::reader::record_buf::value::percent_decode,
10    variant::record::samples::{
11        keys::key,
12        series::{value::Array, Value},
13    },
14    Header,
15};
16
17pub(crate) fn parse_value<'a>(
18    src: &'a str,
19    header: &Header,
20    key: &str,
21) -> io::Result<Option<Value<'a>>> {
22    use crate::header::record::value::map::format::{definition::definition, Number, Type};
23
24    const MISSING: &str = ".";
25
26    if src == MISSING {
27        return Ok(None);
28    } else if key == key::GENOTYPE {
29        return Ok(Some(parse_genotype_value(src)));
30    }
31
32    let (number, ty) = header
33        .formats()
34        .get(key)
35        .map(|format| (format.number(), format.ty()))
36        .or_else(|| definition(header.file_format(), key).map(|(n, t, _)| (n, t)))
37        .unwrap_or_default();
38
39    let value = match (number, ty) {
40        (Number::Count(0), _) => {
41            return Err(io::Error::new(
42                io::ErrorKind::InvalidData,
43                "invalid number for type",
44            ))
45        }
46        (Number::Count(1), Type::Integer) => parse_integer_value(src)?,
47        (Number::Count(1), Type::Float) => parse_float_value(src)?,
48        (Number::Count(1), Type::Character) => parse_character_value(src)?,
49        (Number::Count(1), Type::String) => parse_string_value(src)?,
50        (_, Type::Integer) => parse_integer_array_value(src),
51        (_, Type::Float) => parse_float_array_value(src),
52        (_, Type::Character) => parse_character_array_value(src),
53        (_, Type::String) => parse_string_array_value(src),
54    };
55
56    Ok(Some(value))
57}
58
59fn parse_integer_value(src: &str) -> io::Result<Value<'_>> {
60    src.parse()
61        .map(Value::Integer)
62        .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
63}
64
65fn parse_float_value(src: &str) -> io::Result<Value<'_>> {
66    src.parse()
67        .map(Value::Float)
68        .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
69}
70
71fn parse_character_value(src: &str) -> io::Result<Value<'_>> {
72    let s = percent_decode(src).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
73    let mut chars = s.chars();
74
75    if let Some(c) = chars.next() {
76        if chars.next().is_none() {
77            return Ok(Value::Character(c));
78        }
79    }
80
81    Err(io::Error::new(
82        io::ErrorKind::InvalidData,
83        "invalid character",
84    ))
85}
86
87fn parse_string_value(src: &str) -> io::Result<Value<'_>> {
88    percent_decode(src)
89        .map(Value::String)
90        .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
91}
92
93fn parse_genotype_value(src: &str) -> Value<'_> {
94    Value::Genotype(Box::new(Genotype::new(src)))
95}
96
97fn parse_integer_array_value(src: &str) -> Value<'_> {
98    Value::Array(Array::Integer(Box::new(src)))
99}
100
101fn parse_float_array_value(src: &str) -> Value<'_> {
102    Value::Array(Array::Float(Box::new(src)))
103}
104
105fn parse_character_array_value(src: &str) -> Value<'_> {
106    Value::Array(Array::Character(Box::new(src)))
107}
108
109fn parse_string_array_value(src: &str) -> Value<'_> {
110    Value::Array(Array::String(Box::new(src)))
111}
112
113#[cfg(test)]
114mod tests {
115    use super::*;
116    use crate::{
117        header::record::value::{
118            map::{
119                format::{Number, Type},
120                Format,
121            },
122            Map,
123        },
124        variant::{
125            record::samples::series::value::genotype::Phasing,
126            record_buf::samples::sample::{value::genotype::Allele, Value as ValueBuf},
127        },
128    };
129
130    #[test]
131    fn test_parse_value() -> io::Result<()> {
132        fn t(s: &str, header: &Header, key: &str, expected: Option<ValueBuf>) -> io::Result<()> {
133            let actual = parse_value(s, header, key)
134                .and_then(|result| result.map(ValueBuf::try_from).transpose())?;
135
136            assert_eq!(actual, expected);
137
138            Ok(())
139        }
140
141        #[rustfmt::skip]
142        let header = Header::builder()
143            .add_format("I32", Map::<Format>::new(Number::Count(1), Type::Integer, ""))
144            .add_format("F32", Map::<Format>::new(Number::Count(1), Type::Float, ""))
145            .add_format("CHAR", Map::<Format>::new(Number::Count(1), Type::Character, ""))
146            .add_format("STRING", Map::<Format>::new(Number::Count(1), Type::String, ""))
147            .add_format("I32_ARRAY", Map::<Format>::new(Number::Count(2), Type::Integer, ""))
148            .add_format("F32_ARRAY", Map::<Format>::new(Number::Count(2), Type::Float, ""))
149            .add_format("CHAR_ARRAY", Map::<Format>::new(Number::Count(2), Type::Character, ""))
150            .add_format("STRING_ARRAY", Map::<Format>::new(Number::Count(2), Type::String, ""))
151            .add_format("I32_INVALID", Map::<Format>::new(Number::Count(0), Type::Integer, ""))
152            .build();
153
154        t(".", &header, "I32", None)?;
155        t("8", &header, "I32", Some(ValueBuf::from(8)))?;
156
157        t(".", &header, "F32", None)?;
158        t("0", &header, "F32", Some(ValueBuf::from(0.0)))?;
159
160        t(".", &header, "CHAR", None)?;
161        t("n", &header, "CHAR", Some(ValueBuf::from('n')))?;
162
163        t(".", &header, "STRING", None)?;
164        t("ndls", &header, "STRING", Some(ValueBuf::from("ndls")))?;
165
166        t(".", &header, "I32_ARRAY", None)?;
167        t(
168            "8,.",
169            &header,
170            "I32_ARRAY",
171            Some(ValueBuf::from(vec![Some(8), None])),
172        )?;
173
174        t(".", &header, "F32_ARRAY", None)?;
175        t(
176            "0,.",
177            &header,
178            "F32_ARRAY",
179            Some(ValueBuf::from(vec![Some(0.0), None])),
180        )?;
181
182        t(".", &header, "CHAR_ARRAY", None)?;
183        t(
184            "n,.",
185            &header,
186            "CHAR_ARRAY",
187            Some(ValueBuf::from(vec![Some('n'), None])),
188        )?;
189
190        t(".", &header, "STRING_ARRAY", None)?;
191        t(
192            "n,.",
193            &header,
194            "STRING_ARRAY",
195            Some(ValueBuf::from(vec![Some(String::from("n")), None])),
196        )?;
197
198        t(".", &header, key::GENOTYPE, None)?;
199        t(
200            "0/0",
201            &header,
202            key::GENOTYPE,
203            Some(ValueBuf::Genotype(
204                [
205                    Allele::new(Some(0), Phasing::Unphased),
206                    Allele::new(Some(0), Phasing::Unphased),
207                ]
208                .into_iter()
209                .collect(),
210            )),
211        )?;
212
213        assert!(matches!(
214            parse_value("0", &header, "I32_INVALID"),
215            Err(e) if e.kind() == io::ErrorKind::InvalidData
216        ));
217
218        Ok(())
219    }
220}