noodles_vcf/record/
samples.rs

1//! VCF record samples.
2
3mod keys;
4mod sample;
5pub mod series;
6
7use std::{io, iter};
8
9pub use self::{keys::Keys, sample::Sample, series::Series};
10use crate::Header;
11
12const DELIMITER: char = '\t';
13
14/// Raw VCF record genotypes.
15#[derive(Debug, Eq, PartialEq)]
16pub struct Samples<'r>(&'r str);
17
18impl<'r> Samples<'r> {
19    pub(super) fn new(buf: &'r str) -> Self {
20        Self(buf)
21    }
22
23    /// Returns whether there may be any genotypes.
24    pub fn is_empty(&self) -> bool {
25        self.0.is_empty()
26    }
27
28    /// Returns the keys.
29    pub fn keys(&self) -> Keys<'r> {
30        let (src, _) = self.0.split_once(DELIMITER).unwrap_or_default();
31        Keys::new(src)
32    }
33
34    /// Returns the sample with the given sample name.
35    pub fn get(&self, header: &Header, sample_name: &str) -> Option<Sample<'r>> {
36        header
37            .sample_names()
38            .get_index_of(sample_name)
39            .and_then(|i| self.get_index(i))
40    }
41
42    /// Returns the sample at the given index.
43    pub fn get_index(&self, i: usize) -> Option<Sample<'r>> {
44        self.iter().nth(i)
45    }
46
47    /// Returns the series with the given column name.
48    pub fn select(&'r self, column_name: &str) -> Option<Series<'r>> {
49        self.keys()
50            .iter()
51            .enumerate()
52            .find(|(_, key)| *key == column_name)
53            .map(|(i, key)| Series::new(key, self, i))
54    }
55
56    /// Returns an iterator over series.
57    pub fn series(&'r self) -> impl Iterator<Item = Series<'r>> + 'r {
58        self.keys()
59            .iter()
60            .enumerate()
61            .map(|(i, key)| Series::new(key, self, i))
62    }
63
64    /// Returns an iterator over samples.
65    pub fn iter(&self) -> impl Iterator<Item = Sample<'r>> + '_ {
66        let (_, mut src) = self.0.split_once(DELIMITER).unwrap_or_default();
67
68        iter::from_fn(move || {
69            if src.is_empty() {
70                None
71            } else {
72                Some(parse_sample(&mut src, self.keys()))
73            }
74        })
75    }
76}
77
78impl AsRef<str> for Samples<'_> {
79    fn as_ref(&self) -> &str {
80        self.0
81    }
82}
83
84impl crate::variant::record::Samples for Samples<'_> {
85    fn is_empty(&self) -> bool {
86        self.0.is_empty()
87    }
88
89    fn len(&self) -> usize {
90        self.iter().count()
91    }
92
93    fn column_names<'a, 'h: 'a>(
94        &'a self,
95        _: &'h Header,
96    ) -> Box<dyn Iterator<Item = io::Result<&'a str>> + 'a> {
97        Box::new(self.keys().iter().map(Ok))
98    }
99
100    fn select<'a, 'h: 'a>(
101        &'a self,
102        _: &'h Header,
103        column_name: &str,
104    ) -> Option<io::Result<Box<dyn crate::variant::record::samples::Series + 'a>>> {
105        self.select(column_name)
106            .map(|series| Box::new(series) as Box<dyn crate::variant::record::samples::Series>)
107            .map(Ok)
108    }
109
110    fn series(
111        &self,
112    ) -> Box<
113        dyn Iterator<Item = io::Result<Box<dyn crate::variant::record::samples::Series + '_>>> + '_,
114    > {
115        Box::new(
116            self.series()
117                .map(|series| Box::new(series) as Box<dyn crate::variant::record::samples::Series>)
118                .map(Ok),
119        )
120    }
121
122    fn iter(
123        &self,
124    ) -> Box<dyn Iterator<Item = Box<dyn crate::variant::record::samples::Sample + '_>> + '_> {
125        Box::new(
126            self.iter()
127                .map(|sample| Box::new(sample) as Box<dyn crate::variant::record::samples::Sample>),
128        )
129    }
130}
131
132fn parse_sample<'r>(src: &mut &'r str, keys: Keys<'r>) -> Sample<'r> {
133    const DELIMITER: u8 = b'\t';
134    const MISSING: &str = ".";
135
136    let buf = match src.as_bytes().iter().position(|&b| b == DELIMITER) {
137        Some(i) => {
138            let (buf, rest) = src.split_at(i);
139            *src = &rest[1..];
140            buf
141        }
142        None => {
143            let (buf, rest) = src.split_at(src.len());
144            *src = rest;
145            buf
146        }
147    };
148
149    if buf == MISSING {
150        Sample::new("", keys)
151    } else {
152        Sample::new(buf, keys)
153    }
154}
155
156#[cfg(test)]
157mod tests {
158    use super::*;
159
160    #[test]
161    fn test_is_empty() {
162        assert!(Samples::new("").is_empty());
163        assert!(!Samples::new("GT:GQ\t0|0:13").is_empty());
164    }
165
166    #[test]
167    fn test_get() {
168        let header = Header::builder()
169            .add_sample_name("sample0")
170            .add_sample_name("sample1")
171            .add_sample_name("sample2")
172            .build();
173
174        let samples = Samples::new("GT\t0|0\t1/1\t.");
175
176        let actual = samples.get(&header, "sample0");
177        let expected = Sample::new("0|0", samples.keys());
178        assert_eq!(actual, Some(expected));
179
180        assert!(samples.get(&header, "sample3").is_none());
181    }
182
183    #[test]
184    fn test_get_index() {
185        let samples = Samples::new("GT\t0|0\t1/1\t.");
186        let actual = samples.get_index(0);
187        let expected = Sample::new("0|0", samples.keys());
188        assert_eq!(actual, Some(expected));
189
190        assert!(samples.get_index(3).is_none());
191    }
192
193    #[test]
194    fn test_select() {
195        use crate::variant::record::samples::keys::key;
196
197        let samples = Samples::new("");
198        assert!(samples.select(key::CONDITIONAL_GENOTYPE_QUALITY).is_none());
199
200        let samples = Samples::new("GT:GQ\t0|0:13\t.");
201        assert!(samples.select(key::CONDITIONAL_GENOTYPE_QUALITY).is_some());
202    }
203
204    #[test]
205    fn test_iter() {
206        let samples = Samples::new("");
207        assert!(samples.iter().next().is_none());
208
209        let samples = Samples::new("GT:GQ\t0|0:13\t.");
210        let actual: Vec<_> = samples.iter().collect();
211        let expected = [
212            Sample::new("0|0:13", samples.keys()),
213            Sample::new("", samples.keys()),
214        ];
215        assert_eq!(actual, expected);
216    }
217}