1use std::ffi;
11use std::path::Path;
12use url::Url;
13
14use crate::htslib;
15
16use crate::errors::{Error, Result};
17use crate::utils::path_as_bytes;
18
19#[derive(Debug)]
21pub struct Reader {
22 inner: *mut htslib::faidx_t,
23}
24
25pub fn build(
38 path: impl Into<std::path::PathBuf>,
39) -> Result<(), std::boxed::Box<dyn std::error::Error>> {
40 let path = path.into();
41 let os_path = std::ffi::CString::new(path.display().to_string())?;
42 let rc = unsafe { htslib::fai_build(os_path.as_ptr()) };
43 if rc < 0 {
44 Err(Error::FaidxBuildFailed { path })?
45 } else {
46 Ok(())
47 }
48}
49
50impl Reader {
51 pub fn from_path<P: AsRef<Path>>(path: P) -> Result<Self, Error> {
57 Self::new(&path_as_bytes(path, true)?)
58 }
59
60 pub fn from_url(url: &Url) -> Result<Self, Error> {
66 Self::new(url.as_str().as_bytes())
67 }
68
69 fn new(path: &[u8]) -> Result<Self, Error> {
76 let cpath = ffi::CString::new(path).unwrap();
77 let inner = unsafe { htslib::fai_load(cpath.as_ptr()) };
78 Ok(Self { inner })
79 }
80
81 pub fn fetch_seq<N: AsRef<str>>(&self, name: N, begin: usize, end: usize) -> Result<Vec<u8>> {
89 if begin > std::i64::MAX as usize {
90 return Err(Error::FaidxPositionTooLarge);
91 }
92 if end > std::i64::MAX as usize {
93 return Err(Error::FaidxPositionTooLarge);
94 }
95 let cname = ffi::CString::new(name.as_ref().as_bytes()).unwrap();
96 let mut len_out: htslib::hts_pos_t = 0;
97 let ptr = unsafe {
98 htslib::faidx_fetch_seq64(
99 self.inner, cname.as_ptr(), begin as htslib::hts_pos_t, end as htslib::hts_pos_t, &mut len_out, )
105 };
106 let vec =
107 unsafe { Vec::from_raw_parts(ptr as *mut u8, len_out as usize, len_out as usize) };
108 Ok(vec)
109 }
110
111 pub fn fetch_seq_string<N: AsRef<str>>(
119 &self,
120 name: N,
121 begin: usize,
122 end: usize,
123 ) -> Result<String> {
124 let bytes = self.fetch_seq(name, begin, end)?;
125 Ok(std::str::from_utf8(&bytes).unwrap().to_owned())
126 }
127
128 pub fn n_seqs(&self) -> u64 {
130 let n = unsafe { htslib::faidx_nseq(self.inner) };
131 n as u64
132 }
133
134 pub fn seq_name(&self, i: i32) -> Result<String> {
140 let cname = unsafe {
141 let ptr = htslib::faidx_iseq(self.inner, i);
142 ffi::CStr::from_ptr(ptr)
143 };
144
145 let out = match cname.to_str() {
146 Ok(s) => s.to_string(),
147 Err(_) => {
148 return Err(Error::FaidxBadSeqName);
149 }
150 };
151
152 Ok(out)
153 }
154
155 pub fn fetch_seq_len<N: AsRef<str>>(&self, name: N) -> u64 {
161 let cname = ffi::CString::new(name.as_ref().as_bytes()).unwrap();
162 let seq_len = unsafe { htslib::faidx_seq_len(self.inner, cname.as_ptr()) };
163 seq_len as u64
164 }
165
166 pub fn seq_names(&self) -> Result<Vec<String>> {
181 let num_seq = self.n_seqs();
182 let mut ret = Vec::with_capacity(num_seq as usize);
183 for seq_id in 0..num_seq {
184 ret.push(self.seq_name(seq_id as i32)?);
185 }
186 Ok(ret)
187 }
188}
189
190impl Drop for Reader {
191 fn drop(&mut self) {
192 unsafe {
193 htslib::fai_destroy(self.inner);
194 }
195 }
196}
197
198#[cfg(test)]
199mod tests {
200 use super::*;
201
202 fn open_reader() -> Reader {
203 Reader::from_path(format!("{}/test/test_cram.fa", env!("CARGO_MANIFEST_DIR")))
204 .ok()
205 .unwrap()
206 }
207 #[test]
208 fn faidx_open() {
209 open_reader();
210 }
211
212 #[test]
213 fn faidx_read_chr_first_base() {
214 let r = open_reader();
215
216 let bseq = r.fetch_seq("chr1", 0, 0).unwrap();
217 assert_eq!(bseq.len(), 1);
218 assert_eq!(bseq, b"G");
219
220 let seq = r.fetch_seq_string("chr1", 0, 0).unwrap();
221 assert_eq!(seq.len(), 1);
222 assert_eq!(seq, "G");
223 }
224
225 #[test]
226 fn faidx_read_chr_start() {
227 let r = open_reader();
228
229 let bseq = r.fetch_seq("chr1", 0, 9).unwrap();
231 assert_eq!(bseq.len(), 10);
232 assert_eq!(bseq, b"GGGCACAGCC");
233
234 let seq = r.fetch_seq_string("chr1", 0, 9).unwrap();
235 assert_eq!(seq.len(), 10);
236 assert_eq!(seq, "GGGCACAGCC");
237 }
239
240 #[test]
241 fn faidx_read_chr_between() {
242 let r = open_reader();
243
244 let bseq = r.fetch_seq("chr1", 4, 14).unwrap();
245 assert_eq!(bseq.len(), 11);
246 assert_eq!(bseq, b"ACAGCCTCACC");
247
248 let seq = r.fetch_seq_string("chr1", 4, 14).unwrap();
249 assert_eq!(seq.len(), 11);
250 assert_eq!(seq, "ACAGCCTCACC");
251 }
252
253 #[test]
254 fn faidx_read_chr_end() {
255 let r = open_reader();
256
257 let bseq = r.fetch_seq("chr1", 110, 120).unwrap();
258 assert_eq!(bseq.len(), 10);
259 assert_eq!(bseq, b"CCCCTCCGTG");
260
261 let seq = r.fetch_seq_string("chr1", 110, 120).unwrap();
262 assert_eq!(seq.len(), 10);
263 assert_eq!(seq, "CCCCTCCGTG");
264 }
265
266 #[test]
267 fn faidx_read_twice_string() {
268 let r = open_reader();
269 let seq = r.fetch_seq_string("chr1", 110, 120).unwrap();
270 assert_eq!(seq.len(), 10);
271 assert_eq!(seq, "CCCCTCCGTG");
272
273 let seq = r.fetch_seq_string("chr1", 5, 9).unwrap();
274 assert_eq!(seq.len(), 5);
275 assert_eq!(seq, "CAGCC");
276 }
277
278 #[test]
279 fn faidx_read_twice_bytes() {
280 let r = open_reader();
281 let seq = r.fetch_seq("chr1", 110, 120).unwrap();
282 assert_eq!(seq.len(), 10);
283 assert_eq!(seq, b"CCCCTCCGTG");
284
285 let seq = r.fetch_seq("chr1", 5, 9).unwrap();
286 assert_eq!(seq.len(), 5);
287 assert_eq!(seq, b"CAGCC");
288 }
289
290 #[test]
291 fn faidx_position_too_large() {
292 let r = open_reader();
293 let position_too_large = i64::MAX as usize;
294 let res = r.fetch_seq("chr1", position_too_large, position_too_large + 1);
295 assert_eq!(res, Err(Error::FaidxPositionTooLarge));
296 }
297
298 #[test]
299 fn faidx_n_seqs() {
300 let r = open_reader();
301 assert_eq!(r.n_seqs(), 3);
302 }
303
304 #[test]
305 fn faidx_seq_name() {
306 let r = open_reader();
307 let n = r.seq_name(1).unwrap();
308 assert_eq!(n, "chr2");
309 }
310
311 #[test]
312 fn faidx_get_seq_len() {
313 let r = open_reader();
314 let chr1_len = r.fetch_seq_len("chr1");
315 let chr2_len = r.fetch_seq_len("chr2");
316 assert_eq!(chr1_len, 120u64);
317 assert_eq!(chr2_len, 120u64);
318 }
319
320 #[test]
321 fn open_many_readers() {
322 for _ in 0..500_000 {
323 let reader = open_reader();
324 drop(reader);
325 }
326 }
327}