noodles_sam/
header.rs

1//! SAM header.
2//!
3//! A SAM header is a list of header records. There are four map-like record types ([header]
4//! (`HD`), [reference sequence] (`SQ`), [read group] (`RG`), and [program] (`PG`)) and one string
5//! type (comment (`CO`)).
6//!
7//! Records are optional, which means an empty header is considered a valid SAM header.
8//!
9//! If there is a header (`HD`) record, it must appear as the first record.
10//!
11//! Reference sequence, read group, and program records are maps of records with unique
12//! identifiers. Comments are a list of strings. Reference sequences must be ordered; whereas read
13//! groups, programs, and comments can be unordered. (`sam::Header` defines them to be ordered.)
14//!
15//! [header]: `record::value::map::Header`
16//! [reference sequence]: `ReferenceSequence`
17//! [read group]: `ReadGroup`
18//! [program]: `record::value::map::Program`
19//!
20//! # Examples
21//!
22//! ## Parse a SAM header
23//!
24//! ```
25//! use noodles_sam as sam;
26//!
27//! let s = "\
28//! @HD\tVN:1.6\tSO:coordinate
29//! @SQ\tSN:sq0\tLN:8
30//! @SQ\tSN:sq1\tLN:13
31//! ";
32//!
33//! let header: sam::Header = s.parse()?;
34//!
35//! assert!(header.header().is_some());
36//! assert_eq!(header.reference_sequences().len(), 2);
37//! assert!(header.read_groups().is_empty());
38//! assert!(header.programs().as_ref().is_empty());
39//! assert!(header.comments().is_empty());
40//! # Ok::<(), sam::header::ParseError>(())
41//! ```
42//!
43//! ## Create a SAM header
44//!
45//! ```
46//! use std::num::NonZeroUsize;
47//!
48//! use noodles_sam::{
49//!     self as sam,
50//!     header::record::value::{map::ReferenceSequence, Map},
51//! };
52//!
53//! let header = sam::Header::builder()
54//!     .set_header(Default::default())
55//!     .add_reference_sequence(
56//!         "sq0",
57//!         Map::<ReferenceSequence>::new(NonZeroUsize::try_from(8)?),
58//!     )
59//!     .add_reference_sequence(
60//!         "sq1",
61//!         Map::<ReferenceSequence>::new(NonZeroUsize::try_from(13)?),
62//!     )
63//!     .build();
64//!
65//! assert!(header.header().is_some());
66//! assert_eq!(header.reference_sequences().len(), 2);
67//! assert!(header.read_groups().is_empty());
68//! assert!(header.programs().as_ref().is_empty());
69//! assert!(header.comments().is_empty());
70//! # Ok::<(), Box<dyn std::error::Error>>(())
71//! ```
72
73mod builder;
74mod parser;
75mod programs;
76pub mod record;
77
78pub use self::{
79    builder::Builder,
80    parser::{ParseError, Parser},
81    record::Record,
82};
83
84use std::str::{self, FromStr};
85
86use bstr::BString;
87use indexmap::IndexMap;
88
89pub use self::programs::Programs;
90use self::record::value::{
91    map::{self, ReadGroup, ReferenceSequence},
92    Map,
93};
94
95/// A reference sequence dictionary.
96pub type ReferenceSequences = IndexMap<BString, Map<ReferenceSequence>>;
97
98/// An ordered map of read groups.
99pub type ReadGroups = IndexMap<BString, Map<ReadGroup>>;
100
101/// A SAM header.
102///
103/// Records are grouped by their types: header, reference sequence, read group, program, and
104/// comment.
105#[derive(Clone, Debug, Default, Eq, PartialEq)]
106pub struct Header {
107    header: Option<Map<map::Header>>,
108    reference_sequences: ReferenceSequences,
109    read_groups: ReadGroups,
110    programs: Programs,
111    comments: Vec<BString>,
112}
113
114impl Header {
115    /// Returns a builder to create a SAM header.
116    ///
117    /// # Examples
118    ///
119    /// ```
120    /// use noodles_sam as sam;
121    /// let builder = sam::Header::builder();
122    /// ```
123    pub fn builder() -> Builder {
124        Builder::default()
125    }
126
127    /// Returns the SAM header header if it is set.
128    ///
129    /// # Examples
130    ///
131    /// ```
132    /// use noodles_sam::{
133    ///     self as sam,
134    ///     header::record::value::{map::Header, Map},
135    /// };
136    ///
137    /// let header = sam::Header::default();
138    /// assert!(header.header().is_none());
139    ///
140    /// let header = sam::Header::builder()
141    ///     .set_header(Map::<Header>::default())
142    ///     .build();
143    ///
144    /// assert!(header.header().is_some());
145    /// ```
146    pub fn header(&self) -> Option<&Map<map::Header>> {
147        self.header.as_ref()
148    }
149
150    /// Returns a mutable reference to the SAM header header if it is set.
151    ///
152    /// # Examples
153    ///
154    /// ```
155    /// use noodles_sam::{self as sam, header::record::value::{map, Map}};
156    ///
157    /// let mut header = sam::Header::default();
158    /// assert!(header.header().is_none());
159    ///
160    /// let hd = Map::<map::Header>::default();
161    /// *header.header_mut() = Some(hd.clone());
162    /// assert_eq!(header.header(), Some(&hd));
163    /// ```
164    pub fn header_mut(&mut self) -> &mut Option<Map<map::Header>> {
165        &mut self.header
166    }
167
168    /// Returns the SAM header reference sequences.
169    ///
170    /// This is also called the reference sequence dictionary.
171    ///
172    /// # Examples
173    ///
174    /// ```
175    /// use std::num::NonZeroUsize;
176    ///
177    /// use noodles_sam::{
178    ///     self as sam,
179    ///     header::record::value::{map::ReferenceSequence, Map},
180    /// };
181    ///
182    /// let header = sam::Header::builder()
183    ///     .add_reference_sequence(
184    ///         "sq0",
185    ///         Map::<ReferenceSequence>::new(NonZeroUsize::try_from(13)?)
186    ///     )
187    ///     .build();
188    ///
189    /// let reference_sequences = header.reference_sequences();
190    /// assert_eq!(reference_sequences.len(), 1);
191    /// assert!(reference_sequences.contains_key(&b"sq0"[..]));
192    /// # Ok::<(), Box<dyn std::error::Error>>(())
193    /// ```
194    pub fn reference_sequences(&self) -> &ReferenceSequences {
195        &self.reference_sequences
196    }
197
198    /// Returns a mutable reference to the SAM header reference sequences.
199    ///
200    /// This is also called the reference sequence dictionary.
201    ///
202    /// # Examples
203    ///
204    /// ```
205    /// use std::num::NonZeroUsize;
206    ///
207    /// use noodles_sam::{
208    ///     self as sam,
209    ///     header::record::value::{map::ReferenceSequence, Map},
210    /// };
211    ///
212    /// let mut header = sam::Header::default();
213    ///
214    /// header.reference_sequences_mut().insert(
215    ///     String::from("sq0").into(),
216    ///     Map::<ReferenceSequence>::new(NonZeroUsize::try_from(13)?),
217    /// );
218    ///
219    /// let reference_sequences = header.reference_sequences();
220    /// assert_eq!(reference_sequences.len(), 1);
221    /// assert!(reference_sequences.contains_key(&b"sq0"[..]));
222    /// # Ok::<(), Box<dyn std::error::Error>>(())
223    /// ```
224    pub fn reference_sequences_mut(&mut self) -> &mut ReferenceSequences {
225        &mut self.reference_sequences
226    }
227
228    /// Returns the SAM header read groups.
229    ///
230    /// # Examples
231    ///
232    /// ```
233    /// use noodles_sam::{
234    ///     self as sam,
235    ///     header::record::value::{map::ReadGroup, Map},
236    /// };
237    ///
238    /// let header = sam::Header::builder()
239    ///     .add_read_group("rg0", Map::<ReadGroup>::default())
240    ///     .build();
241    ///
242    /// let read_groups = header.read_groups();
243    /// assert_eq!(read_groups.len(), 1);
244    /// assert!(read_groups.contains_key(&b"rg0"[..]));
245    /// ```
246    pub fn read_groups(&self) -> &ReadGroups {
247        &self.read_groups
248    }
249
250    /// Returns a mutable reference to the SAM header read groups.
251    ///
252    /// # Examples
253    ///
254    /// ```
255    /// use noodles_sam::{
256    ///     self as sam,
257    ///     header::record::value::{map::ReadGroup, Map},
258    /// };
259    ///
260    /// let mut header = sam::Header::default();
261    /// assert!(header.read_groups().is_empty());
262    ///
263    /// let read_group = Map::<ReadGroup>::default();
264    /// header.read_groups_mut().insert(String::from("rg0").into(), read_group);
265    ///
266    /// let read_groups = header.read_groups();
267    /// assert_eq!(read_groups.len(), 1);
268    /// assert!(read_groups.contains_key(&b"rg0"[..]));
269    /// ```
270    pub fn read_groups_mut(&mut self) -> &mut ReadGroups {
271        &mut self.read_groups
272    }
273
274    /// Returns the SAM header programs.
275    ///
276    /// # Examples
277    ///
278    /// ```
279    /// use noodles_sam::{self as sam, header::record::value::{map::Program, Map}};
280    ///
281    /// let program = Map::<Program>::default();
282    /// let header = sam::Header::builder().add_program("noodles-sam", program).build();
283    ///
284    /// let programs = header.programs();
285    /// assert_eq!(programs.as_ref().len(), 1);
286    /// assert!(programs.as_ref().contains_key(&b"noodles-sam"[..]));
287    /// ```
288    pub fn programs(&self) -> &Programs {
289        &self.programs
290    }
291
292    /// Returns a mutable reference to the SAM header programs.
293    ///
294    /// # Examples
295    ///
296    /// ```
297    /// use noodles_sam::{self as sam, header::record::value::{map::Program, Map}};
298    ///
299    /// let mut header = sam::Header::default();
300    ///
301    /// let program = Map::<Program>::default();
302    /// header
303    ///     .programs_mut()
304    ///     .as_mut()
305    ///     .insert(String::from("noodles-sam").into(), program);
306    ///
307    /// let programs = header.programs();
308    /// assert_eq!(programs.as_ref().len(), 1);
309    /// assert!(programs.as_ref().contains_key(&b"noodles-sam"[..]));
310    /// ```
311    pub fn programs_mut(&mut self) -> &mut Programs {
312        &mut self.programs
313    }
314
315    /// Returns the SAM header comments.
316    ///
317    /// # Examples
318    ///
319    /// ```
320    /// use bstr::BString;
321    /// use noodles_sam as sam;
322    /// let header = sam::Header::builder().add_comment("noodles-sam").build();
323    /// let comments = header.comments();
324    /// assert_eq!(header.comments(), [BString::from("noodles-sam")]);
325    /// ```
326    pub fn comments(&self) -> &[BString] {
327        &self.comments
328    }
329
330    /// Returns a mutable reference to the SAM header comments.
331    ///
332    /// To simply append a comment record, consider using [`Self::add_comment`] instead.
333    ///
334    /// # Examples
335    ///
336    /// ```
337    /// use bstr::BString;
338    /// use noodles_sam as sam;
339    /// let mut header = sam::Header::default();
340    /// header.comments_mut().push(BString::from("noodles-sam"));
341    /// assert_eq!(header.comments(), [BString::from("noodles-sam")]);
342    /// ```
343    pub fn comments_mut(&mut self) -> &mut Vec<BString> {
344        &mut self.comments
345    }
346
347    /// Adds a comment.
348    ///
349    /// # Examples
350    ///
351    /// ```
352    /// use bstr::BString;
353    /// use noodles_sam as sam;
354    /// let mut header = sam::Header::default();
355    /// header.add_comment("noodles-sam");
356    /// assert_eq!(header.comments(), [BString::from("noodles-sam")]);
357    /// ```
358    pub fn add_comment<C>(&mut self, comment: C)
359    where
360        C: Into<BString>,
361    {
362        self.comments.push(comment.into());
363    }
364
365    /// Returns whether there are no records in this SAM header.
366    ///
367    /// # Examples
368    ///
369    /// ```
370    /// use noodles_sam as sam;
371    ///
372    /// let header = sam::Header::default();
373    /// assert!(header.is_empty());
374    ///
375    /// let header = sam::Header::builder().add_comment("noodles-sam").build();
376    /// assert!(!header.is_empty());
377    /// ```
378    pub fn is_empty(&self) -> bool {
379        self.header.is_none()
380            && self.reference_sequences.is_empty()
381            && self.read_groups.is_empty()
382            && self.programs.as_ref().is_empty()
383            && self.comments.is_empty()
384    }
385
386    /// Removes all records from the header.
387    ///
388    /// # Examples
389    ///
390    /// ```
391    /// use noodles_sam as sam;
392    ///
393    /// let mut header = sam::Header::builder().add_comment("ndls").build();
394    /// assert!(!header.is_empty());
395    ///
396    /// header.clear();
397    /// assert!(header.is_empty());
398    /// ```
399    pub fn clear(&mut self) {
400        self.header.take();
401        self.reference_sequences.clear();
402        self.read_groups.clear();
403        self.programs.as_mut().clear();
404        self.comments.clear();
405    }
406}
407
408impl FromStr for Header {
409    type Err = ParseError;
410
411    /// Parses a raw SAM header.
412    ///
413    /// # Examples
414    ///
415    /// ```
416    /// use noodles_sam as sam;
417    ///
418    /// let s = "\
419    /// @HD\tVN:1.6\tSO:coordinate
420    /// @SQ\tSN:sq0\tLN:8
421    /// @SQ\tSN:sq1\tLN:13
422    /// ";
423    ///
424    /// let header: sam::Header = s.parse()?;
425    ///
426    /// assert!(header.header().is_some());
427    /// assert_eq!(header.reference_sequences().len(), 2);
428    /// assert!(header.read_groups().is_empty());
429    /// assert!(header.programs().as_ref().is_empty());
430    /// assert!(header.comments().is_empty());
431    /// # Ok::<(), sam::header::ParseError>(())
432    /// ```
433    fn from_str(s: &str) -> Result<Self, Self::Err> {
434        parser::parse(s)
435    }
436}