1#[cfg(feature = "async")]
4pub mod r#async;
5
6pub mod codecs;
7pub mod container;
8pub mod crai;
9pub mod file_definition;
10pub mod fs;
11mod huffman;
12pub mod io;
13pub mod record;
14
15use md5::{Digest, Md5};
16
17pub use self::{file_definition::FileDefinition, record::Record};
18
19#[deprecated(since = "0.78.0", note = "Use `cram::container` instead.")]
20pub use self::container as data_container;
21
22#[deprecated(since = "0.78.0", note = "Use `cram::io::reader::Container` instead.")]
23pub use self::io::reader::Container;
24
25#[deprecated(since = "0.78.0", note = "Use `cram::io::reader::Container` instead.")]
26pub use self::io::reader::Container as DataContainer;
27
28#[deprecated(since = "0.76.0", note = "Use `cram::fs::index` instead.")]
29pub use self::fs::index;
30
31#[cfg(feature = "async")]
32#[deprecated(since = "0.69.0", note = "Use `cram::r#async::io::Reader` instead.")]
33pub use self::r#async::io::Reader as AsyncReader;
34
35#[cfg(feature = "async")]
36#[deprecated(since = "0.69.0", note = "Use `cram::r#async::io::Writer` instead.")]
37pub use self::r#async::io::Writer as AsyncWriter;
38
39const MAGIC_NUMBER: [u8; 4] = *b"CRAM";
40const MD5_OUTPUT_SIZE: usize = 16;
41
42fn calculate_normalized_sequence_digest(mut sequence: &[u8]) -> [u8; MD5_OUTPUT_SIZE] {
44 const MD5_BLOCK_SIZE: usize = 64;
45 const CHUNK_SIZE: usize = 8 * MD5_BLOCK_SIZE;
46
47 let mut hasher = Md5::new();
48 let mut buf = [0; CHUNK_SIZE];
49
50 while !sequence.is_empty() {
51 let n = sequence
52 .iter()
53 .position(|b| !b.is_ascii_graphic() || b.is_ascii_lowercase())
54 .unwrap_or(sequence.len());
55
56 hasher.update(&sequence[..n]);
57 sequence = &sequence[n..];
58
59 loop {
61 let mut n = 0;
62
63 for (src, dst) in sequence
64 .iter()
65 .take_while(|b| b.is_ascii_lowercase())
66 .zip(&mut buf)
67 {
68 *dst = src.to_ascii_uppercase();
69 n += 1;
70 }
71
72 if n == 0 {
73 break;
74 }
75
76 hasher.update(&buf[..n]);
77 sequence = &sequence[n..];
78 }
79
80 let n = sequence
82 .iter()
83 .position(|b| b.is_ascii_graphic())
84 .unwrap_or(sequence.len());
85
86 sequence = &sequence[n..];
87 }
88
89 hasher.finalize().into()
90}
91
92#[cfg(test)]
93mod tests {
94 use super::*;
95
96 #[test]
97 fn test_calculate_normalized_sequence_digest() {
98 assert_eq!(
99 calculate_normalized_sequence_digest(b"ACGT"),
100 [
101 0xf1, 0xf8, 0xf4, 0xbf, 0x41, 0x3b, 0x16, 0xad, 0x13, 0x57, 0x22, 0xaa, 0x45, 0x91,
102 0x04, 0x3e
103 ]
104 );
105
106 assert_eq!(
107 calculate_normalized_sequence_digest(b" AC\tgt\n"),
108 [
109 0xf1, 0xf8, 0xf4, 0xbf, 0x41, 0x3b, 0x16, 0xad, 0x13, 0x57, 0x22, 0xaa, 0x45, 0x91,
110 0x04, 0x3e
111 ]
112 );
113
114 assert_eq!(
117 calculate_normalized_sequence_digest(b"ACGT ACGT ACGT\nacgt acgt acgt\n... 12345 !!!"),
118 [
119 0xdf, 0xab, 0xdb, 0xb3, 0x6e, 0x23, 0x9a, 0x6d, 0xa8, 0x89, 0x57, 0x84, 0x1f, 0x32,
120 0xb8, 0xe4
121 ]
122 );
123 }
124}