1use std::ffi;
45use std::path::Path;
46use std::ptr;
47use url::Url;
48
49use crate::errors::{Error, Result};
50use crate::htslib;
51use crate::utils::path_as_bytes;
52
53pub trait Read: Sized {
55 fn read(&mut self, record: &mut Vec<u8>) -> Result<bool>;
67
68 fn records(&mut self) -> Records<'_, Self>;
74
75 fn header(&self) -> &Vec<String>;
77}
78
79#[derive(Debug)]
87pub struct Reader {
88 header: Vec<String>,
90
91 hts_file: *mut htslib::htsFile,
93 hts_format: htslib::htsExactFormat,
95 tbx: *mut htslib::tbx_t,
97 buf: htslib::kstring_t,
99 itr: Option<*mut htslib::hts_itr_t>,
101
102 tid: i64,
104 start: i64,
106 end: i64,
108}
109
110unsafe impl Send for Reader {}
111
112const KS_SEP_LINE: i32 = 2;
114
115impl Reader {
116 pub fn from_path<P: AsRef<Path>>(path: P) -> Result<Self> {
122 Self::new(&path_as_bytes(path, true)?)
123 }
124
125 pub fn from_url(url: &Url) -> Result<Self> {
126 Self::new(url.as_str().as_bytes())
127 }
128
129 fn new(path: &[u8]) -> Result<Self> {
135 let path = ffi::CString::new(path).unwrap();
136 let c_str = ffi::CString::new("r").unwrap();
137 let hts_file = unsafe { htslib::hts_open(path.as_ptr(), c_str.as_ptr()) };
138 let hts_format: u32 = unsafe {
139 let file_format: *const hts_sys::htsFormat = htslib::hts_get_format(hts_file);
140 (*file_format).format
141 };
142
143 let tbx = unsafe { htslib::tbx_index_load(path.as_ptr()) };
144 if tbx.is_null() {
145 return Err(Error::TabixInvalidIndex);
146 }
147 let mut header = Vec::new();
148 let mut buf = htslib::kstring_t {
149 l: 0,
150 m: 0,
151 s: ptr::null_mut(),
152 };
153 unsafe {
154 while htslib::hts_getline(hts_file, KS_SEP_LINE, &mut buf) >= 0 {
155 if buf.l > 0 && i32::from(*buf.s) == (*tbx).conf.meta_char {
156 header.push(String::from(ffi::CStr::from_ptr(buf.s).to_str().unwrap()));
157 } else {
158 break;
159 }
160 }
161 }
162
163 Ok(Reader {
164 header,
165 hts_file,
166 hts_format,
167 tbx,
168 buf,
169 itr: None,
170 tid: -1,
171 start: -1,
172 end: -1,
173 })
174 }
175
176 pub fn tid(&self, name: &str) -> Result<u64> {
178 let name_cstr = ffi::CString::new(name.as_bytes()).unwrap();
179 let res = unsafe { htslib::tbx_name2id(self.tbx, name_cstr.as_ptr()) };
180 if res < 0 {
181 Err(Error::UnknownSequence {
182 sequence: name.to_owned(),
183 })
184 } else {
185 Ok(res as u64)
186 }
187 }
188
189 pub fn fetch(&mut self, tid: u64, start: u64, end: u64) -> Result<()> {
191 self.tid = tid as i64;
192 self.start = start as i64;
193 self.end = end as i64;
194
195 if let Some(itr) = self.itr {
196 unsafe {
197 htslib::hts_itr_destroy(itr);
198 }
199 }
200 let itr = unsafe {
201 htslib::hts_itr_query(
202 (*self.tbx).idx,
203 tid as i32,
204 start as i64,
205 end as i64,
206 Some(htslib::tbx_readrec),
207 )
208 };
209 if itr.is_null() {
210 self.itr = None;
211 Err(Error::Fetch)
212 } else {
213 self.itr = Some(itr);
214 Ok(())
215 }
216 }
217
218 pub fn seqnames(&self) -> Vec<String> {
220 let mut result = Vec::new();
221
222 let mut nseq: i32 = 0;
223 let seqs = unsafe { htslib::tbx_seqnames(self.tbx, &mut nseq) };
224 for i in 0..nseq {
225 unsafe {
226 result.push(String::from(
227 ffi::CStr::from_ptr(*seqs.offset(i as isize))
228 .to_str()
229 .unwrap(),
230 ));
231 }
232 }
233 unsafe {
234 libc::free(seqs as *mut libc::c_void);
235 };
236
237 result
238 }
239
240 pub fn set_threads(&mut self, n_threads: usize) -> Result<()> {
247 assert!(n_threads > 0, "n_threads must be > 0");
248
249 let r = unsafe { htslib::hts_set_threads(self.hts_file, n_threads as i32) };
250 if r != 0 {
251 Err(Error::SetThreads)
252 } else {
253 Ok(())
254 }
255 }
256}
257
258fn overlap(tid1: i64, begin1: i64, end1: i64, tid2: i64, begin2: i64, end2: i64) -> bool {
260 (tid1 == tid2) && (begin1 < end2) && (begin2 < end1)
261}
262
263impl Read for Reader {
264 fn read(&mut self, record: &mut Vec<u8>) -> Result<bool> {
265 match self.itr {
266 Some(itr) => {
267 loop {
268 let ret = unsafe {
270 htslib::hts_itr_next(
271 htslib::hts_get_bgzfp(self.hts_file),
272 itr,
273 &mut self.buf as *mut htslib::kstring_t as *mut libc::c_void,
275 self.tbx as *mut libc::c_void,
277 )
278 };
279 if ret == -1 {
281 return Ok(false);
282 } else if ret == -2 {
283 return Err(Error::TabixTruncatedRecord);
284 } else if ret < 0 {
285 panic!("Return value should not be <0 but was: {}", ret);
286 }
287 let (tid, start, end) =
290 unsafe { ((*itr).curr_tid, (*itr).curr_beg, (*itr).curr_end) };
291 if overlap(self.tid, self.start, self.end, tid as i64, start, end) {
293 *record =
294 unsafe { Vec::from(ffi::CStr::from_ptr(self.buf.s).to_str().unwrap()) };
295 return Ok(true);
296 }
297 }
298 }
299 _ => Err(Error::TabixNoIter),
300 }
301 }
302
303 fn records(&mut self) -> Records<'_, Self> {
304 Records { reader: self }
305 }
306
307 fn header(&self) -> &Vec<String> {
308 &self.header
309 }
310}
311
312impl Drop for Reader {
313 fn drop(&mut self) {
314 unsafe {
315 if self.itr.is_some() {
316 htslib::hts_itr_destroy(self.itr.unwrap());
317 }
318 htslib::tbx_destroy(self.tbx);
319 htslib::hts_close(self.hts_file);
320 }
321 }
322}
323
324#[derive(Debug)]
326pub struct Records<'a, R: Read> {
327 reader: &'a mut R,
328}
329
330impl<'a, R: Read> Iterator for Records<'a, R> {
331 type Item = Result<Vec<u8>>;
332
333 #[allow(clippy::read_zero_byte_vec)]
334 fn next(&mut self) -> Option<Result<Vec<u8>>> {
335 let mut record = Vec::new();
336 match self.reader.read(&mut record) {
337 Ok(false) => None,
338 Ok(true) => Some(Ok(record)),
339 Err(err) => Some(Err(err)),
340 }
341 }
342}
343
344#[cfg(test)]
345mod tests {
346 use super::*;
347
348 #[test]
349 fn bed_basic() {
350 let reader =
351 Reader::from_path("test/tabix_reader/test_bed3.bed.gz").expect("Error opening file.");
352
353 assert_eq!(
355 reader.seqnames(),
356 vec![String::from("chr1"), String::from("chr2")]
357 );
358
359 assert_eq!(reader.tid("chr1").unwrap(), 0);
361 assert_eq!(reader.tid("chr2").unwrap(), 1);
362 assert!(reader.tid("chr3").is_err());
363 }
364
365 #[test]
366 fn bed_fetch_from_chr1_read_api() {
367 let mut reader =
368 Reader::from_path("test/tabix_reader/test_bed3.bed.gz").expect("Error opening file.");
369
370 let chr1_id = reader.tid("chr1").unwrap();
371 assert!(reader.fetch(chr1_id, 1000, 1003).is_ok());
372
373 let mut record = Vec::new();
374 assert!(reader.read(&mut record).is_ok());
375 assert_eq!(record, Vec::from("chr1\t1001\t1002"));
376 assert_eq!(reader.read(&mut record), Ok(false)); }
378
379 #[test]
380 fn bed_fetch_from_chr1_iterator_api() {
381 let mut reader =
382 Reader::from_path("test/tabix_reader/test_bed3.bed.gz").expect("Error opening file.");
383
384 let chr1_id = reader.tid("chr1").unwrap();
385 assert!(reader.fetch(chr1_id, 1000, 1003).is_ok());
386
387 let records: Vec<Vec<u8>> = reader.records().map(|r| r.unwrap()).collect();
388 assert_eq!(records, vec![Vec::from("chr1\t1001\t1002")]);
389 }
390
391 #[test]
392 fn test_fails_on_bam() {
393 let reader = Reader::from_path("test/test.bam");
394 assert!(reader.is_err());
395 }
396
397 #[test]
398 fn test_fails_on_non_existiant() {
399 let reader = Reader::from_path("test/no_such_file");
400 assert!(reader.is_err());
401 }
402
403 #[test]
404 fn test_fails_on_vcf() {
405 let reader = Reader::from_path("test/test_left.vcf");
406 assert!(reader.is_err());
407 }
408
409 #[test]
410 fn test_text_header_regions() {
411 Reader::from_path("test/tabix_reader/genomic_regions_header.txt.gz")
413 .expect("Error opening file.");
414 }
415
416 #[test]
417 fn test_text_header_positions() {
418 Reader::from_path("test/tabix_reader/genomic_positions_header.txt.gz")
421 .expect("Error opening file.");
422 }
423
424 #[test]
425 fn test_text_bad_header() {
426 Reader::from_path("test/tabix_reader/bad_header.txt.gz")
428 .expect_err("Invalid index file should fail.");
429 }
430}