noodles_sam/header/
programs.rs

1use std::io;
2
3use bstr::{BStr, BString, ByteVec};
4use indexmap::{IndexMap, IndexSet};
5
6use super::record::value::map::{program::tag, Map, Program};
7
8type Inner = IndexMap<BString, Map<Program>>;
9
10#[allow(clippy::tabs_in_doc_comments)]
11/// SAM header programs.
12///
13/// SAM header programs are header records that form program chains. A program chain is a directed
14/// acyclic graph (DAG) starting at a root program and ending at a leaf program. Program edges are
15/// directed forward from a parent program to its linked program using the previous program ID
16/// (`PP`) field in the child program.
17///
18/// For example, take the following program records:
19///
20/// ```text
21/// @PG	ID:pg0
22/// @PG	ID:pg1	PP:pg0
23/// ```
24///
25/// This creates the program chain `pg0 -> pg1`.
26///
27/// There can exist more than one program chain in the SAM header programs, e.g.,
28///
29/// ```text
30/// @PG	ID:pg0
31/// @PG	ID:pg1
32/// @PG	ID:pg2
33/// @PG	ID:pg3	PP:pg0
34/// @PG	ID:pg4	PP:pg1
35/// @PG	ID:pg5	PP:pg4
36/// ```
37///
38/// This creates the program chains `pg0 -> pg3`, `pg1 -> pg4 -> pg5`, and `pg2`.
39#[derive(Clone, Debug, Default, Eq, PartialEq)]
40pub struct Programs(Inner);
41
42impl Programs {
43    /// Adds a program.
44    ///
45    /// If the program is the first program in the graph, this is similar to calling
46    /// `IndexMap::insert` on the inner graph. If no previous program is set, this attaches the
47    /// program to all program chains using leaf programs as the given program's previous program.
48    ///
49    /// # Examples
50    ///
51    /// ```
52    /// use noodles_sam::{
53    ///     self as sam,
54    ///     header::record::value::{map::{program::tag, Program}, Map},
55    /// };
56    ///
57    /// let mut header = sam::Header::default();
58    /// let programs = header.programs_mut();
59    ///
60    /// programs.add("pg0", Map::default())?;
61    /// programs.add("pg1", Map::default())?;
62    ///
63    /// let expected = sam::Header::builder()
64    ///     .add_program("pg0", Map::default())
65    ///     .add_program("pg1", Map::builder().insert(tag::PREVIOUS_PROGRAM_ID, "pg0").build()?)
66    ///     .build();
67    ///
68    /// assert_eq!(programs, expected.programs());
69    /// # Ok::<_, Box<dyn std::error::Error>>(())
70    /// ```
71    pub fn add<P>(&mut self, id_prefix: P, map: Map<Program>) -> io::Result<()>
72    where
73        P: Into<BString>,
74    {
75        const SEPARATOR: u8 = b'-';
76
77        let id_prefix = id_prefix.into();
78
79        if self.0.is_empty() {
80            self.0.insert(id_prefix, map);
81            return Ok(());
82        }
83
84        let previous_program_ids: Vec<BString> = self.leaves()?.map(|(id, _)| id.into()).collect();
85        let contains_prefix_id = self.0.contains_key(&id_prefix);
86
87        for (i, previous_program_id) in previous_program_ids.into_iter().enumerate() {
88            let mut id = id_prefix.clone();
89
90            if i > 0 || contains_prefix_id {
91                id.push_byte(SEPARATOR);
92                id.push_str(&previous_program_id);
93
94                if self.0.contains_key(&id) {
95                    return Err(io::Error::new(io::ErrorKind::InvalidInput, "duplicate ID"));
96                }
97            }
98
99            let mut map = map.clone();
100
101            map.other_fields_mut()
102                .entry(tag::PREVIOUS_PROGRAM_ID)
103                .or_insert(previous_program_id);
104
105            self.0.insert(id, map);
106        }
107
108        Ok(())
109    }
110
111    /// Returns an iterator over root programs.
112    ///
113    /// A root program is a first program of a program chain.
114    ///
115    /// # Examples
116    ///
117    /// ```
118    /// use noodles_sam::{
119    ///     self as sam,
120    ///     header::record::value::{map::{program::tag, Program}, Map},
121    /// };
122    ///
123    /// let header = sam::Header::builder()
124    ///     .add_program("pg0", Map::default())
125    ///     .add_program("pg1", Map::builder().insert(tag::PREVIOUS_PROGRAM_ID, "pg0").build()?)
126    ///     .add_program("pg2", Map::builder().insert(tag::PREVIOUS_PROGRAM_ID, "pg1").build()?)
127    ///     .add_program("pg3", Map::default())
128    ///     .build();
129    ///
130    /// let mut roots = header.programs().roots();
131    /// assert_eq!(roots.next().map(|(id, _)| id.as_ref()), Some(&b"pg0"[..]));
132    /// assert_eq!(roots.next().map(|(id, _)| id.as_ref()), Some(&b"pg3"[..]));
133    /// assert!(roots.next().is_none());
134    /// # Ok::<_, sam::header::record::value::map::builder::BuildError>(())
135    /// ```
136    pub fn roots(&self) -> impl Iterator<Item = (&BStr, &Map<Program>)> {
137        self.0
138            .iter()
139            .filter(|(_, map)| !map.other_fields().contains_key(&tag::PREVIOUS_PROGRAM_ID))
140            .map(|(id, map)| (id.as_ref(), map))
141    }
142
143    /// Returns an iterator over leaf programs.
144    ///
145    /// A leaf program is the last program of a program chain.
146    ///
147    /// # Errors
148    ///
149    /// This returns an `io::Error` if any program chain has a cycle.
150    ///
151    /// # Examples
152    ///
153    /// ```
154    /// use noodles_sam::{
155    ///     self as sam,
156    ///     header::record::value::{map::{program::tag, Program}, Map},
157    /// };
158    ///
159    /// let header = sam::Header::builder()
160    ///     .add_program("pg0", Map::default())
161    ///     .add_program("pg1", Map::builder().insert(tag::PREVIOUS_PROGRAM_ID, "pg0").build()?)
162    ///     .add_program("pg2", Map::builder().insert(tag::PREVIOUS_PROGRAM_ID, "pg1").build()?)
163    ///     .add_program("pg3", Map::default())
164    ///     .build();
165    ///
166    /// let mut leaves = header.programs().leaves()?;
167    /// assert_eq!(leaves.next().map(|(id, _)| id.as_ref()), Some(&b"pg3"[..]));
168    /// assert_eq!(leaves.next().map(|(id, _)| id.as_ref()), Some(&b"pg2"[..]));
169    /// assert!(leaves.next().is_none());
170    /// # Ok::<_, Box<dyn std::error::Error>>(())
171    /// ```
172    pub fn leaves(&self) -> io::Result<impl Iterator<Item = (&BStr, &Map<Program>)>> {
173        let mut ids: IndexSet<_> = self.0.keys().collect();
174
175        for (id, map) in &self.0 {
176            if let Some(previous_program_id) = map.other_fields().get(&tag::PREVIOUS_PROGRAM_ID) {
177                if has_cycle(&self.0, previous_program_id.as_ref(), id.as_ref()) {
178                    return Err(io::Error::new(io::ErrorKind::InvalidData, "cycle detected"));
179                }
180
181                ids.swap_remove(previous_program_id);
182            }
183        }
184
185        Ok(ids.into_iter().map(|id| {
186            // SAFETY: `id` is guaranteed to be in the set of keys.
187            self.0
188                .get_key_value(id)
189                .map(|(i, map)| (i.as_ref(), map))
190                .unwrap()
191        }))
192    }
193}
194
195fn has_cycle<'a>(graph: &'a Inner, mut parent_id: &'a BStr, node_id: &'a BStr) -> bool {
196    loop {
197        // SAFETY: `parent_id` is guaranteed to be in the graph.
198        let node = &graph[parent_id];
199
200        if let Some(previous_program_id) = node.other_fields().get(&tag::PREVIOUS_PROGRAM_ID) {
201            parent_id = previous_program_id.as_ref();
202
203            if parent_id == node_id {
204                return true;
205            }
206        } else {
207            return false;
208        }
209    }
210}
211
212impl AsRef<Inner> for Programs {
213    fn as_ref(&self) -> &Inner {
214        &self.0
215    }
216}
217
218impl AsMut<Inner> for Programs {
219    fn as_mut(&mut self) -> &mut Inner {
220        &mut self.0
221    }
222}
223
224#[cfg(test)]
225mod tests {
226    use super::*;
227    use crate::Header;
228
229    #[test]
230    fn test_add() -> Result<(), Box<dyn std::error::Error>> {
231        let mut programs = Programs::default();
232        assert!(programs.as_ref().is_empty());
233
234        programs.add("pg0", Map::default())?;
235        let expected = Programs(
236            [(BString::from("pg0"), Map::default())]
237                .into_iter()
238                .collect(),
239        );
240        assert_eq!(programs, expected);
241
242        programs.add("pg1", Map::default())?;
243        let expected = Programs(
244            [
245                (BString::from("pg0"), Map::default()),
246                (
247                    BString::from("pg1"),
248                    Map::builder()
249                        .insert(tag::PREVIOUS_PROGRAM_ID, "pg0")
250                        .build()?,
251                ),
252            ]
253            .into_iter()
254            .collect(),
255        );
256        assert_eq!(programs, expected);
257
258        programs
259            .as_mut()
260            .insert(BString::from("pg2"), Map::default());
261        programs.add("pg3", Map::default())?;
262        let expected = Programs(
263            [
264                (BString::from("pg0"), Map::default()),
265                (
266                    BString::from("pg1"),
267                    Map::builder()
268                        .insert(tag::PREVIOUS_PROGRAM_ID, "pg0")
269                        .build()?,
270                ),
271                (BString::from("pg2"), Map::default()),
272                (
273                    BString::from("pg3"),
274                    Map::builder()
275                        .insert(tag::PREVIOUS_PROGRAM_ID, "pg2")
276                        .build()?,
277                ),
278                (
279                    BString::from("pg3-pg1"),
280                    Map::builder()
281                        .insert(tag::PREVIOUS_PROGRAM_ID, "pg1")
282                        .build()?,
283                ),
284            ]
285            .into_iter()
286            .collect(),
287        );
288        assert_eq!(programs, expected);
289
290        Ok(())
291    }
292
293    #[test]
294    fn test_leaves() -> Result<(), Box<dyn std::error::Error>> {
295        let header = Header::builder()
296            .add_program("pg0", Map::default())
297            .add_program("pg1", Map::default())
298            .add_program("pg2", Map::default())
299            .add_program(
300                "pg3",
301                Map::builder()
302                    .insert(tag::PREVIOUS_PROGRAM_ID, "pg0")
303                    .build()?,
304            )
305            .add_program(
306                "pg4",
307                Map::builder()
308                    .insert(tag::PREVIOUS_PROGRAM_ID, "pg1")
309                    .build()?,
310            )
311            .add_program(
312                "pg5",
313                Map::builder()
314                    .insert(tag::PREVIOUS_PROGRAM_ID, "pg4")
315                    .build()?,
316            )
317            .build();
318
319        let mut leaves = header.programs().leaves()?;
320        assert_eq!(leaves.next().map(|(id, _)| id.as_ref()), Some(&b"pg5"[..]));
321        assert_eq!(leaves.next().map(|(id, _)| id.as_ref()), Some(&b"pg3"[..]));
322        assert_eq!(leaves.next().map(|(id, _)| id.as_ref()), Some(&b"pg2"[..]));
323        assert!(leaves.next().is_none());
324
325        Ok(())
326    }
327
328    #[test]
329    fn test_leaves_with_cycle() -> Result<(), crate::header::record::value::map::builder::BuildError>
330    {
331        let header = Header::builder()
332            .add_program(
333                "pg0",
334                Map::builder()
335                    .insert(tag::PREVIOUS_PROGRAM_ID, "pg1")
336                    .build()?,
337            )
338            .add_program(
339                "pg1",
340                Map::builder()
341                    .insert(tag::PREVIOUS_PROGRAM_ID, "pg0")
342                    .build()?,
343            )
344            .build();
345
346        assert!(matches!(
347            header.programs().leaves(),
348            Err(e) if e.kind() == io::ErrorKind::InvalidData
349        ));
350
351        Ok(())
352    }
353}