noodles_cram/io/reader.rs
1//! CRAM reader and record iterator.
2
3mod builder;
4pub(crate) mod collections;
5pub(crate) mod container;
6pub mod header;
7pub(crate) mod num;
8mod query;
9mod records;
10
11use std::io::{self, Read, Seek, SeekFrom};
12
13use noodles_core::Region;
14use noodles_fasta as fasta;
15use noodles_sam as sam;
16
17pub use self::{builder::Builder, container::Container, query::Query, records::Records};
18use self::{container::read_container, header::read_header};
19use crate::{crai, FileDefinition};
20
21/// A CRAM reader.
22///
23/// The CRAM format is comprised of four main parts: 1) a file definition, 2) a file header, 3) a
24/// list of containers, and 4) an end-of-file (EOF) container.
25///
26/// # Examples
27///
28/// ```no_run
29/// # use std::{fs::File, io};
30/// use noodles_cram as cram;
31/// use noodles_fasta as fasta;
32///
33/// let mut reader = File::open("sample.cram").map(cram::io::Reader::new)?;
34/// let header = reader.read_header()?;
35///
36/// for result in reader.records(&header) {
37/// let record = result?;
38/// // ...
39/// }
40///
41/// # Ok::<_, io::Error>(())
42/// ```
43pub struct Reader<R> {
44 inner: R,
45 reference_sequence_repository: fasta::Repository,
46}
47
48impl<R> Reader<R> {
49 /// Returns a reference to the underlying reader.
50 ///
51 /// # Examples
52 ///
53 /// ```
54 /// # use std::io;
55 /// use noodles_cram as cram;
56 /// let reader = cram::io::Reader::new(io::empty());
57 /// let _inner = reader.get_ref();
58 /// ```
59 pub fn get_ref(&self) -> &R {
60 &self.inner
61 }
62
63 /// Returns a mutable reference to the underlying reader.
64 ///
65 /// # Examples
66 ///
67 /// ```
68 /// # use std::io;
69 /// use noodles_cram as cram;
70 /// let mut reader = cram::io::Reader::new(io::empty());
71 /// let _inner = reader.get_mut();
72 /// ```
73 pub fn get_mut(&mut self) -> &mut R {
74 &mut self.inner
75 }
76
77 /// Unwraps and returns the underlying reader.
78 ///
79 /// # Examples
80 ///
81 /// ```
82 /// # use std::io;
83 /// use noodles_cram as cram;
84 /// let reader = cram::io::Reader::new(io::empty());
85 /// let _inner = reader.into_inner();
86 /// ```
87 pub fn into_inner(self) -> R {
88 self.inner
89 }
90}
91
92impl<R> Reader<R>
93where
94 R: Read,
95{
96 /// Creates a CRAM reader.
97 ///
98 /// # Examples
99 ///
100 /// ```no_run
101 /// # use std::{fs::File, io};
102 /// use noodles_cram as cram;
103 /// let mut reader = File::open("sample.cram").map(cram::io::Reader::new)?;
104 /// # Ok::<(), io::Error>(())
105 /// ```
106 pub fn new(inner: R) -> Self {
107 Builder::default().build_from_reader(inner)
108 }
109
110 pub(crate) fn reference_sequence_repository(&self) -> &fasta::Repository {
111 &self.reference_sequence_repository
112 }
113
114 /// Returns a CRAM header reader.
115 ///
116 /// # Examples
117 ///
118 /// ```no_run
119 /// # use std::{fs::File, io::Read};
120 /// use noodles_cram as cram;
121 ///
122 /// let mut reader = File::open("sample.cram").map(cram::io::Reader::new)?;
123 ///
124 /// let mut header_reader = reader.header_reader();
125 /// header_reader.read_magic_number()?;
126 /// header_reader.read_format_version()?;
127 /// header_reader.read_file_id()?;
128 ///
129 /// let mut container_reader = header_reader.container_reader()?;
130 ///
131 /// let _raw_header = {
132 /// let mut raw_sam_header_reader = container_reader.raw_sam_header_reader()?;
133 /// let mut raw_header = String::new();
134 /// raw_sam_header_reader.read_to_string(&mut raw_header)?;
135 /// raw_sam_header_reader.discard_to_end()?;
136 /// raw_header
137 /// };
138 ///
139 /// container_reader.discard_to_end()?;
140 /// Ok::<_, std::io::Error>(())
141 /// ```
142 pub fn header_reader(&mut self) -> header::Reader<&mut R> {
143 header::Reader::new(&mut self.inner)
144 }
145
146 /// Reads the CRAM file definition.
147 ///
148 /// The CRAM magic number is also checked.
149 ///
150 /// The position of the stream is expected to be at the start.
151 ///
152 /// # Examples
153 ///
154 /// ```no_run
155 /// # use std::{fs::File, io};
156 /// use noodles_cram as cram;
157 /// let mut reader = File::open("sample.cram").map(cram::io::Reader::new)?;
158 /// let file_definition = reader.read_file_definition()?;
159 /// # Ok::<(), io::Error>(())
160 /// ```
161 pub fn read_file_definition(&mut self) -> io::Result<FileDefinition> {
162 header::read_file_definition(&mut self.inner)
163 }
164
165 /// Reads the SAM header.
166 ///
167 /// The position of the stream is expected to be at the CRAM header container, i.e., directly
168 /// after the file definition.
169 ///
170 /// # Examples
171 ///
172 /// ```no_run
173 /// # use std::{fs::File, io};
174 /// use noodles_cram as cram;
175 ///
176 /// let mut reader = File::open("sample.cram").map(cram::io::Reader::new)?;
177 /// reader.read_file_definition()?;
178 ///
179 /// let header = reader.read_file_header()?;
180 /// # Ok::<(), io::Error>(())
181 /// ```
182 pub fn read_file_header(&mut self) -> io::Result<sam::Header> {
183 header::read_file_header(&mut self.inner)
184 }
185
186 /// Reads the SAM header.
187 ///
188 /// This verifies the CRAM magic number, discards the file definition, and reads and parses the
189 /// file header as a SAM header.
190 ///
191 /// The position of the stream is expected to be at the start.
192 ///
193 /// # Examples
194 ///
195 /// ```no_run
196 /// # use std::{fs::File, io};
197 /// use noodles_cram as cram;
198 /// let mut reader = File::open("sample.cram").map(cram::io::Reader::new)?;
199 /// let header = reader.read_header()?;
200 /// # Ok::<(), io::Error>(())
201 /// ```
202 pub fn read_header(&mut self) -> io::Result<sam::Header> {
203 read_header(&mut self.inner)
204 }
205
206 /// Reads a container.
207 ///
208 /// This returns `None` if the container header is the EOF container header, which signals the
209 /// end of the stream.
210 ///
211 /// # Examples
212 ///
213 /// ```no_run
214 /// # use std::{fs::File, io};
215 /// use noodles_cram::{self as cram, io::reader::Container};
216 ///
217 /// let mut reader = File::open("sample.cram").map(cram::io::Reader::new)?;
218 /// reader.read_header()?;
219 ///
220 /// let mut container = Container::default();
221 ///
222 /// while reader.read_container(&mut container)? != 0 {
223 /// // ...
224 /// }
225 /// # Ok::<(), io::Error>(())
226 /// ```
227 pub fn read_container(&mut self, container: &mut Container) -> io::Result<usize> {
228 read_container(&mut self.inner, container)
229 }
230
231 /// Reads a container.
232 #[deprecated(since = "0.78.0", note = "Use `Reader::read_container` instead.")]
233 pub fn read_data_container(&mut self) -> io::Result<Option<Container>> {
234 let mut container = Container::default();
235
236 read_container(&mut self.inner, &mut container).map(|n| match n {
237 0 => None,
238 _ => Some(container),
239 })
240 }
241
242 /// Returns a iterator over records starting from the current stream position.
243 ///
244 /// The stream is expected to be at the start of a container.
245 ///
246 /// # Examples
247 ///
248 /// ```no_run
249 /// # use std::{fs::File, io};
250 /// use noodles_cram as cram;
251 /// use noodles_fasta as fasta;
252 ///
253 /// let mut reader = File::open("sample.cram").map(cram::io::Reader::new)?;
254 /// let header = reader.read_header()?;
255 ///
256 /// for result in reader.records(&header) {
257 /// let record = result?;
258 /// // ...
259 /// }
260 /// # Ok::<_, io::Error>(())
261 /// ```
262 pub fn records<'r>(&'r mut self, header: &'r sam::Header) -> Records<'r, R> {
263 Records::new(self, header)
264 }
265}
266
267impl<R> Reader<R>
268where
269 R: Read + Seek,
270{
271 /// Seeks the underlying reader to the given position.
272 ///
273 /// Positions typically come from the associated CRAM index file.
274 ///
275 /// # Examples
276 ///
277 /// ```no_run
278 /// # use std::io::{self, SeekFrom};
279 /// use noodles_cram as cram;
280 /// let mut reader = cram::io::Reader::new(io::empty());
281 /// reader.seek(SeekFrom::Start(0))?;
282 /// # Ok::<(), io::Error>(())
283 /// ```
284 pub fn seek(&mut self, pos: SeekFrom) -> io::Result<u64> {
285 self.inner.seek(pos)
286 }
287
288 /// Returns the current position of the underlying reader.
289 ///
290 /// # Examples
291 ///
292 /// ```
293 /// # use std::io;
294 /// use noodles_cram as cram;
295 /// let mut reader = cram::io::Reader::new(io::empty());
296 /// let position = reader.position()?;
297 /// assert_eq!(position, 0);
298 /// # Ok::<(), io::Error>(())
299 /// ```
300 pub fn position(&mut self) -> io::Result<u64> {
301 self.inner.stream_position()
302 }
303
304 /// Returns an iterator over records that intersects the given region.
305 ///
306 /// # Examples
307 ///
308 /// ```no_run
309 /// # use std::{fs::File, io};
310 /// use noodles_cram::{self as cram, crai};
311 /// use noodles_fasta as fasta;
312 ///
313 /// let mut reader = File::open("sample.cram").map(cram::io::Reader::new)?;
314 ///
315 /// let header = reader.read_header()?;
316 /// let index = crai::fs::read("sample.cram.crai")?;
317 /// let region = "sq0:8-13".parse()?;
318 /// let query = reader.query(&header, &index, ®ion)?;
319 ///
320 /// for result in query {
321 /// let record = result?;
322 /// // ...
323 /// }
324 /// # Ok::<_, Box<dyn std::error::Error>>(())
325 /// ```
326 pub fn query<'a>(
327 &'a mut self,
328 header: &'a sam::Header,
329 index: &'a crai::Index,
330 region: &Region,
331 ) -> io::Result<Query<'a, R>> {
332 let reference_sequence_id = header
333 .reference_sequences()
334 .get_index_of(region.name())
335 .ok_or_else(|| {
336 io::Error::new(
337 io::ErrorKind::InvalidInput,
338 "invalid reference sequence name",
339 )
340 })?;
341
342 Ok(Query::new(
343 self,
344 header,
345 index,
346 reference_sequence_id,
347 region.interval(),
348 ))
349 }
350}
351
352impl<R> sam::alignment::io::Read<R> for Reader<R>
353where
354 R: Read,
355{
356 fn read_alignment_header(&mut self) -> io::Result<sam::Header> {
357 self.read_header()
358 }
359
360 fn alignment_records<'a>(
361 &'a mut self,
362 header: &'a sam::Header,
363 ) -> Box<dyn Iterator<Item = io::Result<Box<dyn sam::alignment::Record>>> + 'a> {
364 Box::new(
365 self.records(header).map(|result| {
366 result.map(|record| Box::new(record) as Box<dyn sam::alignment::Record>)
367 }),
368 )
369 }
370}
371
372// TODO: Use `slice::split_at_checked` when the MSRV is raised to or above Rust 1.80.0.
373pub(crate) fn split_at_checked(src: &[u8], mid: usize) -> Option<(&[u8], &[u8])> {
374 if mid <= src.len() {
375 Some(src.split_at(mid))
376 } else {
377 None
378 }
379}
380
381// TODO: Use `slice::split_first_chunk` when the MSRV is raised to or above Rust 1.77.0.
382pub(crate) fn split_first_chunk<const N: usize>(src: &[u8]) -> Option<(&[u8; N], &[u8])> {
383 if src.len() < N {
384 None
385 } else {
386 // SAFETY: `src.len` >= `N`.
387 let (head, tail) = src.split_at(N);
388 <&[u8; N]>::try_from(head).ok().map(|chunk| (chunk, tail))
389 }
390}