noodles_vcf/variant/
record.rs

1//! Variant record.
2
3mod alternate_bases;
4mod filters;
5mod ids;
6pub mod info;
7mod reference_bases;
8pub mod samples;
9
10use std::io;
11
12use noodles_core::Position;
13
14pub use self::{
15    alternate_bases::AlternateBases, filters::Filters, ids::Ids, info::Info,
16    reference_bases::ReferenceBases, samples::Samples,
17};
18use crate::Header;
19
20/// A variant record.
21pub trait Record {
22    /// Returns the reference sequence name.
23    fn reference_sequence_name<'a, 'h: 'a>(&'a self, header: &'h Header) -> io::Result<&'a str>;
24
25    /// Returns the variant start position.
26    ///
27    /// This position is 1-based, inclusive.
28    fn variant_start(&self) -> Option<io::Result<Position>>;
29
30    /// Returns the IDs.
31    fn ids(&self) -> Box<dyn Ids + '_>;
32
33    /// Returns the reference bases.
34    fn reference_bases(&self) -> Box<dyn ReferenceBases + '_>;
35
36    /// Returns the alternate bases.
37    fn alternate_bases(&self) -> Box<dyn AlternateBases + '_>;
38
39    /// Returns the quality scores.
40    fn quality_score(&self) -> Option<io::Result<f32>>;
41
42    /// Returns the filters.
43    fn filters(&self) -> Box<dyn Filters + '_>;
44
45    /// Return the info fields.
46    fn info(&self) -> Box<dyn Info + '_>;
47
48    /// Returns the samples.
49    fn samples(&self) -> io::Result<Box<dyn Samples + '_>>;
50
51    /// Returns the variant span.
52    fn variant_span(&self, header: &Header) -> io::Result<usize> {
53        let start = self.variant_start().transpose()?.unwrap_or(Position::MIN);
54        let end = self.variant_end(header)?;
55        Ok(usize::from(end) - usize::from(start) + 1)
56    }
57
58    /// Returns or calculates the variant end position.
59    ///
60    /// If available, this returns the value of the `END` INFO field. Otherwise, it is calculated
61    /// using the [variant start position] and [reference bases length].
62    ///
63    /// This position is 1-based, inclusive.
64    ///
65    /// [variant start position]: `Self::variant_start`
66    /// [reference bases length]: `ReferenceBases::len`
67    fn variant_end(&self, header: &Header) -> io::Result<Position> {
68        if let Some(position) = info_end(header, &self.info()).transpose()? {
69            Ok(position)
70        } else {
71            let start = self.variant_start().transpose()?.unwrap_or(Position::MIN);
72
73            let mut max_len = reference_bases_len(&self.reference_bases())?;
74
75            if let Some(Some(len)) = info_max_sv_len(header, &self.info()).transpose()? {
76                max_len = max_len.max(len);
77            }
78
79            let samples = self.samples()?;
80            if let Some(Some(len)) = samples_max_len(header, &samples).transpose()? {
81                max_len = max_len.max(len);
82            }
83
84            start
85                .checked_add(max_len - 1)
86                .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "position overflow"))
87        }
88    }
89}
90
91fn reference_bases_len<B>(reference_bases: &B) -> io::Result<usize>
92where
93    B: ReferenceBases,
94{
95    if reference_bases.is_empty() {
96        Err(io::Error::new(
97            io::ErrorKind::InvalidData,
98            "invalid reference bases length",
99        ))
100    } else {
101        Ok(reference_bases.len())
102    }
103}
104
105fn info_end<I>(header: &Header, info: &I) -> Option<io::Result<Position>>
106where
107    I: Info,
108{
109    use self::info::field::{key, Value};
110
111    let value = match info.get(header, key::END_POSITION).transpose() {
112        Ok(value) => value??,
113        Err(e) => return Some(Err(e)),
114    };
115
116    match value {
117        Value::Integer(n) => Some(
118            usize::try_from(n)
119                .and_then(Position::try_from)
120                .map_err(|_| {
121                    io::Error::new(io::ErrorKind::InvalidData, "invalid INFO END position")
122                }),
123        ),
124        _ => Some(Err(io::Error::new(
125            io::ErrorKind::InvalidData,
126            "invalid INFO END position value",
127        ))),
128    }
129}
130
131fn info_max_sv_len<I>(header: &Header, info: &I) -> Option<io::Result<Option<usize>>>
132where
133    I: Info,
134{
135    use self::info::field::{key, value::Array, Value};
136
137    let value = match info.get(header, key::SV_LENGTHS).transpose() {
138        Ok(value) => value??,
139        Err(e) => return Some(Err(e)),
140    };
141
142    let mut max_len: Option<usize> = None;
143
144    match value {
145        Value::Array(Array::Integer(values)) => {
146            for result in values.iter() {
147                match result {
148                    Ok(Some(n)) => {
149                        let len = match usize::try_from(n) {
150                            Ok(len) => len,
151                            Err(_) => {
152                                return Some(Err(io::Error::new(
153                                    io::ErrorKind::InvalidData,
154                                    "invalid INFO SVLEN value",
155                                )))
156                            }
157                        };
158
159                        max_len = max_len.map(|n| n.max(len)).or(Some(len));
160                    }
161                    Ok(None) => {}
162                    Err(e) => return Some(Err(e)),
163                }
164            }
165        }
166        _ => {
167            return Some(Err(io::Error::new(
168                io::ErrorKind::InvalidData,
169                "invalid INFO SVLEN position value",
170            )))
171        }
172    }
173
174    Some(Ok(max_len))
175}
176
177fn samples_max_len<S>(header: &Header, samples: &S) -> Option<io::Result<Option<usize>>>
178where
179    S: Samples,
180{
181    use self::samples::{keys::key, series::Value};
182
183    let series = match samples.select(header, key::LENGTH)? {
184        Ok(series) => series,
185        Err(e) => return Some(Err(e)),
186    };
187
188    let mut max_len: Option<usize> = None;
189
190    for result in series.iter(header) {
191        let value = match result {
192            Ok(value) => value,
193            Err(e) => return Some(Err(e)),
194        };
195
196        match value {
197            Some(Value::Integer(n)) => {
198                let len = match usize::try_from(n) {
199                    Ok(len) => len,
200                    Err(_) => {
201                        return Some(Err(io::Error::new(
202                            io::ErrorKind::InvalidData,
203                            "invalid FORMAT LEN value",
204                        )))
205                    }
206                };
207
208                max_len = max_len.map(|n| n.max(len)).or(Some(len));
209            }
210            Some(_) => {
211                return Some(Err(io::Error::new(
212                    io::ErrorKind::InvalidData,
213                    "invalid FORMAT LEN type",
214                )))
215            }
216            None => {}
217        }
218    }
219
220    Some(Ok(max_len))
221}
222
223#[cfg(test)]
224mod tests {
225    use super::*;
226    use crate::variant::RecordBuf;
227
228    #[test]
229    fn test_variant_span() -> io::Result<()> {
230        use crate::variant::{record::info::field::key, record_buf::info::field::Value};
231
232        let header = Header::default();
233
234        let record = RecordBuf::builder()
235            .set_info(
236                [(String::from(key::END_POSITION), Some(Value::from(8)))]
237                    .into_iter()
238                    .collect(),
239            )
240            .build();
241
242        assert_eq!(record.variant_span(&header)?, 8);
243
244        let record = RecordBuf::builder().set_reference_bases("ACGT").build();
245        assert_eq!(record.variant_span(&header)?, 4);
246
247        Ok(())
248    }
249
250    #[test]
251    fn test_variant_end() -> Result<(), Box<dyn std::error::Error>> {
252        use crate::variant::{record::info::field::key, record_buf::info::field::Value};
253
254        let header = Header::default();
255
256        let record = RecordBuf::builder()
257            .set_info(
258                [(String::from(key::END_POSITION), Some(Value::from(8)))]
259                    .into_iter()
260                    .collect(),
261            )
262            .build();
263
264        assert_eq!(
265            Record::variant_end(&record, &header)?,
266            Position::try_from(8)?
267        );
268
269        let record = RecordBuf::builder().set_reference_bases("ACGT").build();
270        assert_eq!(
271            Record::variant_end(&record, &header)?,
272            Position::try_from(4)?
273        );
274
275        Ok(())
276    }
277
278    #[test]
279    fn test_variant_end_with_info_sv_len() -> Result<(), Box<dyn std::error::Error>> {
280        use crate::variant::{record::info::field::key, record_buf::info::field::Value};
281
282        let header = Header::default();
283
284        let record = RecordBuf::builder()
285            .set_reference_bases("ACGT")
286            .set_info(
287                [(
288                    String::from(key::SV_LENGTHS),
289                    Some(Value::from(vec![None, Some(5), Some(8)])),
290                )]
291                .into_iter()
292                .collect(),
293            )
294            .build();
295
296        assert_eq!(
297            Record::variant_end(&record, &header)?,
298            Position::try_from(8)?
299        );
300
301        Ok(())
302    }
303
304    #[test]
305    fn test_variant_end_with_samples_len() -> Result<(), Box<dyn std::error::Error>> {
306        use crate::variant::{
307            record::samples::keys::key,
308            record_buf::{samples::sample::Value, Samples},
309        };
310
311        let header = Header::default();
312
313        let keys = [String::from(key::LENGTH)].into_iter().collect();
314        let values = vec![vec![Some(Value::from(8))]];
315
316        let record = RecordBuf::builder()
317            .set_reference_bases("ACGT")
318            .set_samples(Samples::new(keys, values))
319            .build();
320
321        assert_eq!(
322            Record::variant_end(&record, &header)?,
323            Position::try_from(8)?
324        );
325
326        Ok(())
327    }
328}