noodles_cram/io/
reader.rs

1//! CRAM reader and record iterator.
2
3mod builder;
4pub(crate) mod collections;
5pub(crate) mod container;
6pub mod header;
7pub(crate) mod num;
8mod query;
9mod records;
10
11use std::io::{self, Read, Seek, SeekFrom};
12
13use noodles_core::Region;
14use noodles_fasta as fasta;
15use noodles_sam as sam;
16
17pub use self::{builder::Builder, container::Container, query::Query, records::Records};
18use self::{container::read_container, header::read_header};
19use crate::{crai, FileDefinition};
20
21/// A CRAM reader.
22///
23/// The CRAM format is comprised of four main parts: 1) a file definition, 2) a file header, 3) a
24/// list of containers, and 4) an end-of-file (EOF) container.
25///
26/// # Examples
27///
28/// ```no_run
29/// # use std::{fs::File, io};
30/// use noodles_cram as cram;
31/// use noodles_fasta as fasta;
32///
33/// let mut reader = File::open("sample.cram").map(cram::io::Reader::new)?;
34/// let header = reader.read_header()?;
35///
36/// for result in reader.records(&header) {
37///     let record = result?;
38///     // ...
39/// }
40///
41/// # Ok::<_, io::Error>(())
42/// ```
43pub struct Reader<R> {
44    inner: R,
45    reference_sequence_repository: fasta::Repository,
46}
47
48impl<R> Reader<R> {
49    /// Returns a reference to the underlying reader.
50    ///
51    /// # Examples
52    ///
53    /// ```
54    /// # use std::io;
55    /// use noodles_cram as cram;
56    /// let reader = cram::io::Reader::new(io::empty());
57    /// let _inner = reader.get_ref();
58    /// ```
59    pub fn get_ref(&self) -> &R {
60        &self.inner
61    }
62
63    /// Returns a mutable reference to the underlying reader.
64    ///
65    /// # Examples
66    ///
67    /// ```
68    /// # use std::io;
69    /// use noodles_cram as cram;
70    /// let mut reader = cram::io::Reader::new(io::empty());
71    /// let _inner = reader.get_mut();
72    /// ```
73    pub fn get_mut(&mut self) -> &mut R {
74        &mut self.inner
75    }
76
77    /// Unwraps and returns the underlying reader.
78    ///
79    /// # Examples
80    ///
81    /// ```
82    /// # use std::io;
83    /// use noodles_cram as cram;
84    /// let reader = cram::io::Reader::new(io::empty());
85    /// let _inner = reader.into_inner();
86    /// ```
87    pub fn into_inner(self) -> R {
88        self.inner
89    }
90}
91
92impl<R> Reader<R>
93where
94    R: Read,
95{
96    /// Creates a CRAM reader.
97    ///
98    /// # Examples
99    ///
100    /// ```no_run
101    /// # use std::{fs::File, io};
102    /// use noodles_cram as cram;
103    /// let mut reader = File::open("sample.cram").map(cram::io::Reader::new)?;
104    /// # Ok::<(), io::Error>(())
105    /// ```
106    pub fn new(inner: R) -> Self {
107        Builder::default().build_from_reader(inner)
108    }
109
110    pub(crate) fn reference_sequence_repository(&self) -> &fasta::Repository {
111        &self.reference_sequence_repository
112    }
113
114    /// Returns a CRAM header reader.
115    ///
116    /// # Examples
117    ///
118    /// ```no_run
119    /// # use std::{fs::File, io::Read};
120    /// use noodles_cram as cram;
121    ///
122    /// let mut reader = File::open("sample.cram").map(cram::io::Reader::new)?;
123    ///
124    /// let mut header_reader = reader.header_reader();
125    /// header_reader.read_magic_number()?;
126    /// header_reader.read_format_version()?;
127    /// header_reader.read_file_id()?;
128    ///
129    /// let mut container_reader = header_reader.container_reader()?;
130    ///
131    /// let _raw_header = {
132    ///     let mut raw_sam_header_reader = container_reader.raw_sam_header_reader()?;
133    ///     let mut raw_header = String::new();
134    ///     raw_sam_header_reader.read_to_string(&mut raw_header)?;
135    ///     raw_sam_header_reader.discard_to_end()?;
136    ///     raw_header
137    /// };
138    ///
139    /// container_reader.discard_to_end()?;
140    /// Ok::<_, std::io::Error>(())
141    /// ```
142    pub fn header_reader(&mut self) -> header::Reader<&mut R> {
143        header::Reader::new(&mut self.inner)
144    }
145
146    /// Reads the CRAM file definition.
147    ///
148    /// The CRAM magic number is also checked.
149    ///
150    /// The position of the stream is expected to be at the start.
151    ///
152    /// # Examples
153    ///
154    /// ```no_run
155    /// # use std::{fs::File, io};
156    /// use noodles_cram as cram;
157    /// let mut reader = File::open("sample.cram").map(cram::io::Reader::new)?;
158    /// let file_definition = reader.read_file_definition()?;
159    /// # Ok::<(), io::Error>(())
160    /// ```
161    pub fn read_file_definition(&mut self) -> io::Result<FileDefinition> {
162        header::read_file_definition(&mut self.inner)
163    }
164
165    /// Reads the SAM header.
166    ///
167    /// The position of the stream is expected to be at the CRAM header container, i.e., directly
168    /// after the file definition.
169    ///
170    /// # Examples
171    ///
172    /// ```no_run
173    /// # use std::{fs::File, io};
174    /// use noodles_cram as cram;
175    ///
176    /// let mut reader = File::open("sample.cram").map(cram::io::Reader::new)?;
177    /// reader.read_file_definition()?;
178    ///
179    /// let header = reader.read_file_header()?;
180    /// # Ok::<(), io::Error>(())
181    /// ```
182    pub fn read_file_header(&mut self) -> io::Result<sam::Header> {
183        header::read_file_header(&mut self.inner)
184    }
185
186    /// Reads the SAM header.
187    ///
188    /// This verifies the CRAM magic number, discards the file definition, and reads and parses the
189    /// file header as a SAM header.
190    ///
191    /// The position of the stream is expected to be at the start.
192    ///
193    /// # Examples
194    ///
195    /// ```no_run
196    /// # use std::{fs::File, io};
197    /// use noodles_cram as cram;
198    /// let mut reader = File::open("sample.cram").map(cram::io::Reader::new)?;
199    /// let header = reader.read_header()?;
200    /// # Ok::<(), io::Error>(())
201    /// ```
202    pub fn read_header(&mut self) -> io::Result<sam::Header> {
203        read_header(&mut self.inner)
204    }
205
206    /// Reads a container.
207    ///
208    /// This returns `None` if the container header is the EOF container header, which signals the
209    /// end of the stream.
210    ///
211    /// # Examples
212    ///
213    /// ```no_run
214    /// # use std::{fs::File, io};
215    /// use noodles_cram::{self as cram, io::reader::Container};
216    ///
217    /// let mut reader = File::open("sample.cram").map(cram::io::Reader::new)?;
218    /// reader.read_header()?;
219    ///
220    /// let mut container = Container::default();
221    ///
222    /// while reader.read_container(&mut container)? != 0 {
223    ///     // ...
224    /// }
225    /// # Ok::<(), io::Error>(())
226    /// ```
227    pub fn read_container(&mut self, container: &mut Container) -> io::Result<usize> {
228        read_container(&mut self.inner, container)
229    }
230
231    /// Reads a container.
232    #[deprecated(since = "0.78.0", note = "Use `Reader::read_container` instead.")]
233    pub fn read_data_container(&mut self) -> io::Result<Option<Container>> {
234        let mut container = Container::default();
235
236        read_container(&mut self.inner, &mut container).map(|n| match n {
237            0 => None,
238            _ => Some(container),
239        })
240    }
241
242    /// Returns a iterator over records starting from the current stream position.
243    ///
244    /// The stream is expected to be at the start of a container.
245    ///
246    /// # Examples
247    ///
248    /// ```no_run
249    /// # use std::{fs::File, io};
250    /// use noodles_cram as cram;
251    /// use noodles_fasta as fasta;
252    ///
253    /// let mut reader = File::open("sample.cram").map(cram::io::Reader::new)?;
254    /// let header = reader.read_header()?;
255    ///
256    /// for result in reader.records(&header) {
257    ///     let record = result?;
258    ///     // ...
259    /// }
260    /// # Ok::<_, io::Error>(())
261    /// ```
262    pub fn records<'r>(&'r mut self, header: &'r sam::Header) -> Records<'r, R> {
263        Records::new(self, header)
264    }
265}
266
267impl<R> Reader<R>
268where
269    R: Read + Seek,
270{
271    /// Seeks the underlying reader to the given position.
272    ///
273    /// Positions typically come from the associated CRAM index file.
274    ///
275    /// # Examples
276    ///
277    /// ```no_run
278    /// # use std::io::{self, SeekFrom};
279    /// use noodles_cram as cram;
280    /// let mut reader = cram::io::Reader::new(io::empty());
281    /// reader.seek(SeekFrom::Start(0))?;
282    /// # Ok::<(), io::Error>(())
283    /// ```
284    pub fn seek(&mut self, pos: SeekFrom) -> io::Result<u64> {
285        self.inner.seek(pos)
286    }
287
288    /// Returns the current position of the underlying reader.
289    ///
290    /// # Examples
291    ///
292    /// ```
293    /// # use std::io;
294    /// use noodles_cram as cram;
295    /// let mut reader = cram::io::Reader::new(io::empty());
296    /// let position = reader.position()?;
297    /// assert_eq!(position, 0);
298    /// # Ok::<(), io::Error>(())
299    /// ```
300    pub fn position(&mut self) -> io::Result<u64> {
301        self.inner.stream_position()
302    }
303
304    /// Returns an iterator over records that intersects the given region.
305    ///
306    /// # Examples
307    ///
308    /// ```no_run
309    /// # use std::{fs::File, io};
310    /// use noodles_cram::{self as cram, crai};
311    /// use noodles_fasta as fasta;
312    ///
313    /// let mut reader = File::open("sample.cram").map(cram::io::Reader::new)?;
314    ///
315    /// let header = reader.read_header()?;
316    /// let index = crai::fs::read("sample.cram.crai")?;
317    /// let region = "sq0:8-13".parse()?;
318    /// let query = reader.query(&header, &index, &region)?;
319    ///
320    /// for result in query {
321    ///     let record = result?;
322    ///     // ...
323    /// }
324    /// # Ok::<_, Box<dyn std::error::Error>>(())
325    /// ```
326    pub fn query<'a>(
327        &'a mut self,
328        header: &'a sam::Header,
329        index: &'a crai::Index,
330        region: &Region,
331    ) -> io::Result<Query<'a, R>> {
332        let reference_sequence_id = header
333            .reference_sequences()
334            .get_index_of(region.name())
335            .ok_or_else(|| {
336                io::Error::new(
337                    io::ErrorKind::InvalidInput,
338                    "invalid reference sequence name",
339                )
340            })?;
341
342        Ok(Query::new(
343            self,
344            header,
345            index,
346            reference_sequence_id,
347            region.interval(),
348        ))
349    }
350}
351
352impl<R> sam::alignment::io::Read<R> for Reader<R>
353where
354    R: Read,
355{
356    fn read_alignment_header(&mut self) -> io::Result<sam::Header> {
357        self.read_header()
358    }
359
360    fn alignment_records<'a>(
361        &'a mut self,
362        header: &'a sam::Header,
363    ) -> Box<dyn Iterator<Item = io::Result<Box<dyn sam::alignment::Record>>> + 'a> {
364        Box::new(
365            self.records(header).map(|result| {
366                result.map(|record| Box::new(record) as Box<dyn sam::alignment::Record>)
367            }),
368        )
369    }
370}
371
372// TODO: Use `slice::split_at_checked` when the MSRV is raised to or above Rust 1.80.0.
373pub(crate) fn split_at_checked(src: &[u8], mid: usize) -> Option<(&[u8], &[u8])> {
374    if mid <= src.len() {
375        Some(src.split_at(mid))
376    } else {
377        None
378    }
379}
380
381// TODO: Use `slice::split_first_chunk` when the MSRV is raised to or above Rust 1.77.0.
382pub(crate) fn split_first_chunk<const N: usize>(src: &[u8]) -> Option<(&[u8; N], &[u8])> {
383    if src.len() < N {
384        None
385    } else {
386        // SAFETY: `src.len` >= `N`.
387        let (head, tail) = src.split_at(N);
388        <&[u8; N]>::try_from(head).ok().map(|chunk| (chunk, tail))
389    }
390}