noodles_cram/
lib.rs

1#![warn(missing_docs)]
2
3//! **noodles-cram** handles the reading and writing of the CRAM format.
4
5#[cfg(feature = "async")]
6pub mod r#async;
7
8pub mod codecs;
9pub mod container;
10pub mod crai;
11pub mod file_definition;
12pub mod fs;
13mod huffman;
14pub mod io;
15pub mod record;
16
17pub use self::{file_definition::FileDefinition, record::Record};
18
19#[deprecated(since = "0.78.0", note = "Use `cram::container` instead.")]
20pub use self::container as data_container;
21
22#[deprecated(since = "0.78.0", note = "Use `cram::io::reader::Container` instead.")]
23pub use self::io::reader::Container;
24
25#[deprecated(since = "0.78.0", note = "Use `cram::io::reader::Container` instead.")]
26pub use self::io::reader::Container as DataContainer;
27
28#[deprecated(since = "0.76.0", note = "Use `cram::fs::index` instead.")]
29pub use self::fs::index;
30
31#[cfg(feature = "async")]
32#[deprecated(since = "0.69.0", note = "Use `cram::r#async::io::Reader` instead.")]
33pub use self::r#async::io::Reader as AsyncReader;
34
35#[cfg(feature = "async")]
36#[deprecated(since = "0.69.0", note = "Use `cram::r#async::io::Writer` instead.")]
37pub use self::r#async::io::Writer as AsyncWriter;
38
39const MAGIC_NUMBER: [u8; 4] = *b"CRAM";
40
41// _Sequence Alignment/Map Format Specification_ (2021-06-03) § 1.3.2 "Reference MD5 calculation"
42fn calculate_normalized_sequence_digest(sequence: &[u8]) -> [u8; 16] {
43    use md5::{Digest, Md5};
44
45    let mut hasher = Md5::new();
46
47    for &b in sequence {
48        // "All characters outside of the inclusive range 33 ('!') to 126 ('~') are stripped out."
49        if b.is_ascii_graphic() {
50            // "All lowercase characters are converted to uppercase."
51            hasher.update([b.to_ascii_uppercase()]);
52        }
53    }
54
55    hasher.finalize().into()
56}
57
58#[cfg(test)]
59mod tests {
60    use super::*;
61
62    #[test]
63    fn test_calculate_normalized_sequence_digest() {
64        assert_eq!(
65            calculate_normalized_sequence_digest(b"ACGT"),
66            [
67                0xf1, 0xf8, 0xf4, 0xbf, 0x41, 0x3b, 0x16, 0xad, 0x13, 0x57, 0x22, 0xaa, 0x45, 0x91,
68                0x04, 0x3e
69            ]
70        );
71
72        assert_eq!(
73            calculate_normalized_sequence_digest(b"ACgt"),
74            [
75                0xf1, 0xf8, 0xf4, 0xbf, 0x41, 0x3b, 0x16, 0xad, 0x13, 0x57, 0x22, 0xaa, 0x45, 0x91,
76                0x04, 0x3e
77            ]
78        );
79
80        // _Sequence Alignment/Map Format Specification_ (2021-06-03) § 1.3.2 "Reference MD5
81        // calculation"
82        assert_eq!(
83            calculate_normalized_sequence_digest(b"ACGTACGTACGTACGTACGTACGT...12345!!!"),
84            [
85                0xdf, 0xab, 0xdb, 0xb3, 0x6e, 0x23, 0x9a, 0x6d, 0xa8, 0x89, 0x57, 0x84, 0x1f, 0x32,
86                0xb8, 0xe4
87            ]
88        );
89    }
90}