noodles_fasta/io/reader.rs
1//! FASTA reader and iterators.
2
3mod builder;
4mod definition;
5mod records;
6pub mod sequence;
7
8pub use self::{builder::Builder, records::Records};
9
10use std::io::{self, BufRead, Seek, SeekFrom};
11
12use noodles_core::{Position, Region};
13
14use self::definition::read_definition;
15use crate::{fai, Record};
16
17pub(crate) const DEFINITION_PREFIX: u8 = b'>';
18
19/// A FASTA reader.
20pub struct Reader<R> {
21 inner: R,
22}
23
24impl<R> Reader<R>
25where
26 R: BufRead,
27{
28 /// Creates a FASTA reader.
29 ///
30 /// # Examples
31 ///
32 /// ```
33 /// use noodles_fasta as fasta;
34 /// let data = b">sq0\nACGT\n>sq1\nNNNN\nNNNN\nNN\n";
35 /// let mut reader = fasta::io::Reader::new(&data[..]);
36 /// ```
37 pub fn new(inner: R) -> Self {
38 Self { inner }
39 }
40
41 /// Returns a reference to the underlying reader.
42 ///
43 /// # Examples
44 ///
45 /// ```
46 /// use noodles_fasta as fasta;
47 /// let reader = fasta::io::Reader::new(&[][..]);
48 /// assert!(reader.get_ref().is_empty());
49 /// ```
50 pub fn get_ref(&self) -> &R {
51 &self.inner
52 }
53
54 /// Returns a mutable reference to the underlying reader.
55 ///
56 /// # Examples
57 ///
58 /// ```
59 /// use noodles_fasta as fasta;
60 /// let mut reader = fasta::io::Reader::new(&[][..]);
61 /// assert!(reader.get_mut().is_empty());
62 /// ```
63 pub fn get_mut(&mut self) -> &mut R {
64 &mut self.inner
65 }
66
67 /// Returns the underlying reader.
68 ///
69 /// # Examples
70 ///
71 /// ```
72 /// use noodles_fasta as fasta;
73 /// let reader = fasta::io::Reader::new(&[][..]);
74 /// assert!(reader.into_inner().is_empty());
75 /// ```
76 pub fn into_inner(self) -> R {
77 self.inner
78 }
79
80 /// Reads a raw definition line.
81 ///
82 /// The given buffer will not include the trailing newline. It can subsequently be parsed as a
83 /// [`crate::record::Definition`].
84 ///
85 /// The position of the stream is expected to be at the start or at the start of another
86 /// definition.
87 ///
88 /// If successful, this returns the number of bytes read from the stream. If the number of
89 /// bytes read is 0, the stream reached EOF.
90 ///
91 /// # Examples
92 ///
93 /// ```
94 /// # use std::io;
95 /// use noodles_fasta as fasta;
96 ///
97 /// let data = b">sq0\nACGT\n>sq1\nNNNN\nNNNN\nNN\n";
98 /// let mut reader = fasta::io::Reader::new(&data[..]);
99 ///
100 /// let mut buf = String::new();
101 /// reader.read_definition(&mut buf)?;
102 ///
103 /// assert_eq!(buf, ">sq0");
104 /// # Ok::<(), io::Error>(())
105 /// ```
106 pub fn read_definition(&mut self, buf: &mut String) -> io::Result<usize> {
107 read_definition(&mut self.inner, buf)
108 }
109
110 /// Reads a sequence.
111 ///
112 /// The given buffer consumes a sequence without newlines until another definition or EOF is
113 /// reached.
114 ///
115 /// The position of the stream is expected to be at the start of a sequence, which is directly
116 /// after a definition.
117 ///
118 /// If successful, this returns the number of bases read from the stream. If the number of
119 /// bases read is 0, the stream reached EOF (though this case is likely an error).
120 ///
121 /// # Examples
122 ///
123 /// ```
124 /// # use std::io;
125 /// use noodles_fasta as fasta;
126 ///
127 /// let data = b">sq0\nACGT\n>sq1\nNNNN\nNNNN\nNN\n";
128 /// let mut reader = fasta::io::Reader::new(&data[..]);
129 /// reader.read_definition(&mut String::new())?;
130 ///
131 /// let mut buf = Vec::new();
132 /// reader.read_sequence(&mut buf)?;
133 ///
134 /// assert_eq!(buf, b"ACGT");
135 /// # Ok::<(), io::Error>(())
136 /// ```
137 pub fn read_sequence(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
138 use self::sequence::read_sequence;
139 read_sequence(&mut self.inner, buf)
140 }
141
142 /// Returns a sequence reader.
143 ///
144 /// A [`sequence::Reader`] can be used for lower-level reading of the raw sequence.
145 ///
146 /// The position of the stream is expected to be at the start of a sequence to read a full
147 /// sequence or within a sequence to read a partial one.
148 ///
149 /// # Examples
150 ///
151 /// ```
152 /// # use std::io::{self, Read};
153 /// use noodles_fasta as fasta;
154 ///
155 /// let data = b">sq0\nACGT\n>sq1\nNNNN\nNNNN\nNN\n";
156 /// let mut reader = fasta::io::Reader::new(&data[..]);
157 /// reader.read_definition(&mut String::new())?;
158 ///
159 /// let mut sequence_reader = reader.sequence_reader();
160 /// let mut buf = vec![0; 2];
161 /// sequence_reader.read_exact(&mut buf)?;
162 ///
163 /// assert_eq!(buf, b"AC");
164 /// # Ok::<(), io::Error>(())
165 /// ```
166 pub fn sequence_reader(&mut self) -> sequence::Reader<'_, R> {
167 sequence::Reader::new(self.get_mut())
168 }
169
170 /// Returns an iterator over records starting from the current stream position.
171 ///
172 /// The position of the stream is expected to be at the start or at the start of another
173 /// definition.
174 ///
175 /// ```
176 /// # use std::io;
177 /// use noodles_fasta::{self as fasta, record::{Definition, Sequence}};
178 ///
179 /// let data = b">sq0\nACGT\n>sq1\nNNNN\nNNNN\nNN\n";
180 /// let mut reader = fasta::io::Reader::new(&data[..]);
181 ///
182 /// let mut records = reader.records();
183 ///
184 /// assert_eq!(records.next().transpose()?, Some(fasta::Record::new(
185 /// Definition::new("sq0", None),
186 /// Sequence::from(b"ACGT".to_vec()),
187 /// )));
188 ///
189 /// assert_eq!(records.next().transpose()?, Some(fasta::Record::new(
190 /// Definition::new("sq1", None),
191 /// Sequence::from(b"NNNNNNNNNN".to_vec()),
192 /// )));
193 ///
194 /// assert!(records.next().is_none());
195 /// # Ok::<(), io::Error>(())
196 /// ```
197 pub fn records(&mut self) -> Records<'_, R> {
198 Records::new(self)
199 }
200}
201
202impl<R> Reader<R>
203where
204 R: BufRead + Seek,
205{
206 /// Returns a record of the given region.
207 ///
208 /// # Examples
209 ///
210 /// ```
211 /// # use std::io::Cursor;
212 /// use noodles_core::Region;
213 /// use noodles_fasta::{self as fasta, fai, record::{Definition, Sequence}};
214 ///
215 /// let data = b">sq0\nNNNN\n>sq1\nACGT\n>sq2\nNNNN\n";
216 /// let index = fai::Index::from(vec![
217 /// fai::Record::new("sq0", 4, 5, 4, 5),
218 /// fai::Record::new("sq1", 4, 15, 4, 5),
219 /// fai::Record::new("sq2", 4, 25, 4, 5),
220 /// ]);
221 ///
222 /// let mut reader = fasta::io::Reader::new(Cursor::new(data));
223 ///
224 /// let region = Region::new("sq1", ..);
225 /// let record = reader.query(&index, ®ion)?;
226 /// assert_eq!(record, fasta::Record::new(
227 /// Definition::new("sq1", None),
228 /// Sequence::from(b"ACGT".to_vec()),
229 /// ));
230 ///
231 /// let region = "sq1:2-3".parse()?;
232 /// let record = reader.query(&index, ®ion)?;
233 /// assert_eq!(record, fasta::Record::new(
234 /// Definition::new("sq1:2-3", None),
235 /// Sequence::from(b"CG".to_vec()),
236 /// ));
237 /// # Ok::<(), Box<dyn std::error::Error>>(())
238 /// ```
239 pub fn query(&mut self, index: &fai::Index, region: &Region) -> io::Result<Record> {
240 use self::sequence::read_sequence_limit;
241 use crate::record::{Definition, Sequence};
242
243 let pos = index.query(region)?;
244 self.get_mut().seek(SeekFrom::Start(pos))?;
245
246 let definition = Definition::new(region.to_string(), None);
247
248 let interval = region.interval();
249 let start = usize::from(interval.start().unwrap_or(Position::MIN));
250 let end = usize::from(interval.end().unwrap_or(Position::MAX));
251 let len = end - start + 1;
252
253 let mut raw_sequence = Vec::new();
254 read_sequence_limit(&mut self.inner, len, &mut raw_sequence)?;
255
256 let sequence = Sequence::from(raw_sequence);
257
258 Ok(Record::new(definition, sequence))
259 }
260}
261
262// Reads all bytes until a line feed ('\n') or EOF is reached.
263//
264// The buffer will not include the trailing newline ('\n' or '\r\n').
265pub(crate) fn read_line<R>(reader: &mut R, buf: &mut String) -> io::Result<usize>
266where
267 R: BufRead,
268{
269 const LINE_FEED: char = '\n';
270 const CARRIAGE_RETURN: char = '\r';
271
272 match reader.read_line(buf) {
273 Ok(0) => Ok(0),
274 Ok(n) => {
275 if buf.ends_with(LINE_FEED) {
276 buf.pop();
277
278 if buf.ends_with(CARRIAGE_RETURN) {
279 buf.pop();
280 }
281 }
282
283 Ok(n)
284 }
285 Err(e) => Err(e),
286 }
287}
288
289#[cfg(test)]
290mod tests {
291 use super::*;
292
293 #[test]
294 fn test_read_definition() -> io::Result<()> {
295 let data = b">sq0\nACGT\n";
296 let mut reader = Reader::new(&data[..]);
297
298 let mut description_buf = String::new();
299 reader.read_definition(&mut description_buf)?;
300
301 assert_eq!(description_buf, ">sq0");
302
303 Ok(())
304 }
305
306 #[test]
307 fn test_read_line() -> io::Result<()> {
308 let mut buf = String::new();
309
310 let data = b"noodles\n";
311 let mut reader = &data[..];
312 buf.clear();
313 read_line(&mut reader, &mut buf)?;
314 assert_eq!(buf, "noodles");
315
316 let data = b"noodles\r\n";
317 let mut reader = &data[..];
318 buf.clear();
319 read_line(&mut reader, &mut buf)?;
320 assert_eq!(buf, "noodles");
321
322 let data = b"noodles";
323 let mut reader = &data[..];
324 buf.clear();
325 read_line(&mut reader, &mut buf)?;
326 assert_eq!(buf, "noodles");
327
328 Ok(())
329 }
330}