noodles_sam/header/programs.rs
1use std::io;
2
3use bstr::{BStr, BString, ByteVec};
4use indexmap::{IndexMap, IndexSet};
5
6use super::record::value::map::{program::tag, Map, Program};
7
8type Inner = IndexMap<BString, Map<Program>>;
9
10#[allow(clippy::tabs_in_doc_comments)]
11/// SAM header programs.
12///
13/// SAM header programs are header records that form program chains. A program chain is a directed
14/// acyclic graph (DAG) starting at a root program and ending at a leaf program. Program edges are
15/// directed forward from a parent program to its linked program using the previous program ID
16/// (`PP`) field in the child program.
17///
18/// For example, take the following program records:
19///
20/// ```text
21/// @PG ID:pg0
22/// @PG ID:pg1 PP:pg0
23/// ```
24///
25/// This creates the program chain `pg0 -> pg1`.
26///
27/// There can exist more than one program chain in the SAM header programs, e.g.,
28///
29/// ```text
30/// @PG ID:pg0
31/// @PG ID:pg1
32/// @PG ID:pg2
33/// @PG ID:pg3 PP:pg0
34/// @PG ID:pg4 PP:pg1
35/// @PG ID:pg5 PP:pg4
36/// ```
37///
38/// This creates the program chains `pg0 -> pg3`, `pg1 -> pg4 -> pg5`, and `pg2`.
39#[derive(Clone, Debug, Default, Eq, PartialEq)]
40pub struct Programs(Inner);
41
42impl Programs {
43 /// Adds a program.
44 ///
45 /// If the program is the first program in the graph, this is similar to calling
46 /// `IndexMap::insert` on the inner graph. If no previous program is set, this attaches the
47 /// program to all program chains using leaf programs as the given program's previous program.
48 ///
49 /// # Examples
50 ///
51 /// ```
52 /// use noodles_sam::{
53 /// self as sam,
54 /// header::record::value::{map::{program::tag, Program}, Map},
55 /// };
56 ///
57 /// let mut header = sam::Header::default();
58 /// let programs = header.programs_mut();
59 ///
60 /// programs.add("pg0", Map::default())?;
61 /// programs.add("pg1", Map::default())?;
62 ///
63 /// let expected = sam::Header::builder()
64 /// .add_program("pg0", Map::default())
65 /// .add_program("pg1", Map::builder().insert(tag::PREVIOUS_PROGRAM_ID, "pg0").build()?)
66 /// .build();
67 ///
68 /// assert_eq!(programs, expected.programs());
69 /// # Ok::<_, Box<dyn std::error::Error>>(())
70 /// ```
71 pub fn add<P>(&mut self, id_prefix: P, map: Map<Program>) -> io::Result<()>
72 where
73 P: Into<BString>,
74 {
75 const SEPARATOR: u8 = b'-';
76
77 let id_prefix = id_prefix.into();
78
79 if self.0.is_empty() {
80 self.0.insert(id_prefix, map);
81 return Ok(());
82 }
83
84 let previous_program_ids: Vec<BString> = self.leaves()?.map(|(id, _)| id.into()).collect();
85 let contains_prefix_id = self.0.contains_key(&id_prefix);
86
87 for (i, previous_program_id) in previous_program_ids.into_iter().enumerate() {
88 let mut id = id_prefix.clone();
89
90 if i > 0 || contains_prefix_id {
91 id.push_byte(SEPARATOR);
92 id.push_str(&previous_program_id);
93
94 if self.0.contains_key(&id) {
95 return Err(io::Error::new(io::ErrorKind::InvalidInput, "duplicate ID"));
96 }
97 }
98
99 let mut map = map.clone();
100
101 map.other_fields_mut()
102 .entry(tag::PREVIOUS_PROGRAM_ID)
103 .or_insert(previous_program_id);
104
105 self.0.insert(id, map);
106 }
107
108 Ok(())
109 }
110
111 /// Returns an iterator over root programs.
112 ///
113 /// A root program is a first program of a program chain.
114 ///
115 /// # Examples
116 ///
117 /// ```
118 /// use noodles_sam::{
119 /// self as sam,
120 /// header::record::value::{map::{program::tag, Program}, Map},
121 /// };
122 ///
123 /// let header = sam::Header::builder()
124 /// .add_program("pg0", Map::default())
125 /// .add_program("pg1", Map::builder().insert(tag::PREVIOUS_PROGRAM_ID, "pg0").build()?)
126 /// .add_program("pg2", Map::builder().insert(tag::PREVIOUS_PROGRAM_ID, "pg1").build()?)
127 /// .add_program("pg3", Map::default())
128 /// .build();
129 ///
130 /// let mut roots = header.programs().roots();
131 /// assert_eq!(roots.next().map(|(id, _)| id.as_ref()), Some(&b"pg0"[..]));
132 /// assert_eq!(roots.next().map(|(id, _)| id.as_ref()), Some(&b"pg3"[..]));
133 /// assert!(roots.next().is_none());
134 /// # Ok::<_, sam::header::record::value::map::builder::BuildError>(())
135 /// ```
136 pub fn roots(&self) -> impl Iterator<Item = (&BStr, &Map<Program>)> {
137 self.0
138 .iter()
139 .filter(|(_, map)| !map.other_fields().contains_key(&tag::PREVIOUS_PROGRAM_ID))
140 .map(|(id, map)| (id.as_ref(), map))
141 }
142
143 /// Returns an iterator over leaf programs.
144 ///
145 /// A leaf program is the last program of a program chain.
146 ///
147 /// # Errors
148 ///
149 /// This returns an `io::Error` if any program chain has a cycle.
150 ///
151 /// # Examples
152 ///
153 /// ```
154 /// use noodles_sam::{
155 /// self as sam,
156 /// header::record::value::{map::{program::tag, Program}, Map},
157 /// };
158 ///
159 /// let header = sam::Header::builder()
160 /// .add_program("pg0", Map::default())
161 /// .add_program("pg1", Map::builder().insert(tag::PREVIOUS_PROGRAM_ID, "pg0").build()?)
162 /// .add_program("pg2", Map::builder().insert(tag::PREVIOUS_PROGRAM_ID, "pg1").build()?)
163 /// .add_program("pg3", Map::default())
164 /// .build();
165 ///
166 /// let mut leaves = header.programs().leaves()?;
167 /// assert_eq!(leaves.next().map(|(id, _)| id.as_ref()), Some(&b"pg3"[..]));
168 /// assert_eq!(leaves.next().map(|(id, _)| id.as_ref()), Some(&b"pg2"[..]));
169 /// assert!(leaves.next().is_none());
170 /// # Ok::<_, Box<dyn std::error::Error>>(())
171 /// ```
172 pub fn leaves(&self) -> io::Result<impl Iterator<Item = (&BStr, &Map<Program>)>> {
173 let mut ids: IndexSet<_> = self.0.keys().collect();
174
175 for (id, map) in &self.0 {
176 if let Some(previous_program_id) = map.other_fields().get(&tag::PREVIOUS_PROGRAM_ID) {
177 if has_cycle(&self.0, previous_program_id.as_ref(), id.as_ref()) {
178 return Err(io::Error::new(io::ErrorKind::InvalidData, "cycle detected"));
179 }
180
181 ids.swap_remove(previous_program_id);
182 }
183 }
184
185 Ok(ids.into_iter().map(|id| {
186 // SAFETY: `id` is guaranteed to be in the set of keys.
187 self.0
188 .get_key_value(id)
189 .map(|(i, map)| (i.as_ref(), map))
190 .unwrap()
191 }))
192 }
193}
194
195fn has_cycle<'a>(graph: &'a Inner, mut parent_id: &'a BStr, node_id: &'a BStr) -> bool {
196 loop {
197 // SAFETY: `parent_id` is guaranteed to be in the graph.
198 let node = &graph[parent_id];
199
200 if let Some(previous_program_id) = node.other_fields().get(&tag::PREVIOUS_PROGRAM_ID) {
201 parent_id = previous_program_id.as_ref();
202
203 if parent_id == node_id {
204 return true;
205 }
206 } else {
207 return false;
208 }
209 }
210}
211
212impl AsRef<Inner> for Programs {
213 fn as_ref(&self) -> &Inner {
214 &self.0
215 }
216}
217
218impl AsMut<Inner> for Programs {
219 fn as_mut(&mut self) -> &mut Inner {
220 &mut self.0
221 }
222}
223
224#[cfg(test)]
225mod tests {
226 use super::*;
227 use crate::Header;
228
229 #[test]
230 fn test_add() -> Result<(), Box<dyn std::error::Error>> {
231 let mut programs = Programs::default();
232 assert!(programs.as_ref().is_empty());
233
234 programs.add("pg0", Map::default())?;
235 let expected = Programs(
236 [(BString::from("pg0"), Map::default())]
237 .into_iter()
238 .collect(),
239 );
240 assert_eq!(programs, expected);
241
242 programs.add("pg1", Map::default())?;
243 let expected = Programs(
244 [
245 (BString::from("pg0"), Map::default()),
246 (
247 BString::from("pg1"),
248 Map::builder()
249 .insert(tag::PREVIOUS_PROGRAM_ID, "pg0")
250 .build()?,
251 ),
252 ]
253 .into_iter()
254 .collect(),
255 );
256 assert_eq!(programs, expected);
257
258 programs
259 .as_mut()
260 .insert(BString::from("pg2"), Map::default());
261 programs.add("pg3", Map::default())?;
262 let expected = Programs(
263 [
264 (BString::from("pg0"), Map::default()),
265 (
266 BString::from("pg1"),
267 Map::builder()
268 .insert(tag::PREVIOUS_PROGRAM_ID, "pg0")
269 .build()?,
270 ),
271 (BString::from("pg2"), Map::default()),
272 (
273 BString::from("pg3"),
274 Map::builder()
275 .insert(tag::PREVIOUS_PROGRAM_ID, "pg2")
276 .build()?,
277 ),
278 (
279 BString::from("pg3-pg1"),
280 Map::builder()
281 .insert(tag::PREVIOUS_PROGRAM_ID, "pg1")
282 .build()?,
283 ),
284 ]
285 .into_iter()
286 .collect(),
287 );
288 assert_eq!(programs, expected);
289
290 Ok(())
291 }
292
293 #[test]
294 fn test_leaves() -> Result<(), Box<dyn std::error::Error>> {
295 let header = Header::builder()
296 .add_program("pg0", Map::default())
297 .add_program("pg1", Map::default())
298 .add_program("pg2", Map::default())
299 .add_program(
300 "pg3",
301 Map::builder()
302 .insert(tag::PREVIOUS_PROGRAM_ID, "pg0")
303 .build()?,
304 )
305 .add_program(
306 "pg4",
307 Map::builder()
308 .insert(tag::PREVIOUS_PROGRAM_ID, "pg1")
309 .build()?,
310 )
311 .add_program(
312 "pg5",
313 Map::builder()
314 .insert(tag::PREVIOUS_PROGRAM_ID, "pg4")
315 .build()?,
316 )
317 .build();
318
319 let mut leaves = header.programs().leaves()?;
320 assert_eq!(leaves.next().map(|(id, _)| id.as_ref()), Some(&b"pg5"[..]));
321 assert_eq!(leaves.next().map(|(id, _)| id.as_ref()), Some(&b"pg3"[..]));
322 assert_eq!(leaves.next().map(|(id, _)| id.as_ref()), Some(&b"pg2"[..]));
323 assert!(leaves.next().is_none());
324
325 Ok(())
326 }
327
328 #[test]
329 fn test_leaves_with_cycle() -> Result<(), crate::header::record::value::map::builder::BuildError>
330 {
331 let header = Header::builder()
332 .add_program(
333 "pg0",
334 Map::builder()
335 .insert(tag::PREVIOUS_PROGRAM_ID, "pg1")
336 .build()?,
337 )
338 .add_program(
339 "pg1",
340 Map::builder()
341 .insert(tag::PREVIOUS_PROGRAM_ID, "pg0")
342 .build()?,
343 )
344 .build();
345
346 assert!(matches!(
347 header.programs().leaves(),
348 Err(e) if e.kind() == io::ErrorKind::InvalidData
349 ));
350
351 Ok(())
352 }
353}