1use std::ffi;
11use std::path::Path;
12use url::Url;
13
14use crate::htslib;
15use crate::tpool::ThreadPool;
16
17use crate::errors::{Error, Result};
18
19fn path_as_bytes<'a, P: 'a + AsRef<Path>>(path: P, must_exist: bool) -> Result<Vec<u8>> {
20 if path.as_ref().exists() || !must_exist {
21 Ok(path
22 .as_ref()
23 .to_str()
24 .ok_or(Error::NonUnicodePath)?
25 .as_bytes()
26 .to_owned())
27 } else {
28 Err(Error::FileNotFound {
29 path: path.as_ref().to_owned(),
30 })
31 }
32}
33
34pub fn is_bgzip<P: AsRef<Path>>(path: P) -> Result<bool, Error> {
44 let byte_path = path_as_bytes(path, true)?;
45 let cpath = ffi::CString::new(byte_path).unwrap();
46 let is_bgzf = unsafe { htslib::bgzf_is_bgzf(cpath.as_ptr()) == 1 };
47 Ok(is_bgzf)
48}
49
50#[derive(Debug)]
52pub struct Reader {
53 inner: *mut htslib::BGZF,
54}
55
56impl Reader {
57 pub fn from_stdin() -> Result<Self, Error> {
59 Self::new(b"-")
60 }
61
62 pub fn from_path<P: AsRef<Path>>(path: P) -> Result<Self, Error> {
68 Self::new(&path_as_bytes(path, true)?)
69 }
70
71 pub fn from_url(url: &Url) -> Result<Self, Error> {
77 Self::new(url.as_str().as_bytes())
78 }
79
80 fn new(path: &[u8]) -> Result<Self, Error> {
87 let mode = ffi::CString::new("r").unwrap();
88 let cpath = ffi::CString::new(path).unwrap();
89 let inner = unsafe { htslib::bgzf_open(cpath.as_ptr(), mode.as_ptr()) };
90 if inner != std::ptr::null_mut() {
91 Ok(Self { inner })
92 } else {
93 Err(Error::FileOpen {
94 path: String::from_utf8(path.to_vec()).unwrap(),
95 })
96 }
97 }
98
99 pub fn set_thread_pool(&mut self, tpool: &ThreadPool) -> Result<()> {
105 let b = tpool.handle.borrow_mut();
106 let r = unsafe {
107 htslib::bgzf_thread_pool(self.inner, b.inner.pool as *mut _, 0) };
109
110 if r != 0 {
111 Err(Error::ThreadPool)
112 } else {
113 Ok(())
114 }
115 }
116}
117
118impl std::io::Read for Reader {
119 fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
120 let nbytes = unsafe {
121 htslib::bgzf_read(self.inner, buf.as_mut_ptr() as *mut libc::c_void, buf.len())
122 };
123 if nbytes < 0 {
124 Err(std::io::Error::new(
125 std::io::ErrorKind::Other,
126 "Can not read",
127 ))
128 } else {
129 Ok(nbytes as usize)
130 }
131 }
132}
133
134#[derive(Debug, Clone, Copy)]
145pub enum CompressionLevel {
146 Default,
147 NoCompression,
148 Uncompressed,
149 Fastest,
150 Maximum,
151 Level(i8),
152}
153impl CompressionLevel {
154 fn convert(self) -> Result<i8> {
156 match self {
157 CompressionLevel::NoCompression => Ok(-2),
158 CompressionLevel::Default => Ok(-1),
159 CompressionLevel::Uncompressed => Ok(0),
160 CompressionLevel::Fastest => Ok(1),
161 CompressionLevel::Maximum => Ok(9),
162 CompressionLevel::Level(i @ -2..=9) => Ok(i),
163 CompressionLevel::Level(i) => Err(Error::BgzfInvalidCompressionLevel { level: i }),
164 }
165 }
166}
167
168#[derive(Debug)]
170pub struct Writer {
171 inner: *mut htslib::BGZF,
172 tpool: Option<ThreadPool>,
173}
174
175impl Writer {
176 pub fn from_stdout() -> Result<Self, Error> {
178 Self::from_stdout_with_compression(CompressionLevel::Default)
179 }
180
181 pub fn from_stdout_with_compression(level: CompressionLevel) -> Result<Self, Error> {
187 Self::new(b"-", level)
188 }
189
190 pub fn from_path<P: AsRef<Path>>(path: P) -> Result<Self, Error> {
196 Self::from_path_with_level(path, CompressionLevel::Default)
197 }
198
199 pub fn from_path_with_level<P: AsRef<Path>>(
205 path: P,
206 level: CompressionLevel,
207 ) -> Result<Self, Error> {
208 Self::new(&path_as_bytes(path, false)?, level)
209 }
210
211 fn new(path: &[u8], level: CompressionLevel) -> Result<Self, Error> {
217 let mode = Self::get_open_mode(level)?;
218 let cpath = ffi::CString::new(path).unwrap();
219 let inner = unsafe { htslib::bgzf_open(cpath.as_ptr(), mode.as_ptr()) };
220 if inner != std::ptr::null_mut() {
221 Ok(Self { inner, tpool: None })
222 } else {
223 Err(Error::FileOpen {
224 path: String::from_utf8(path.to_vec()).unwrap(),
225 })
226 }
227 }
228
229 fn get_open_mode(level: CompressionLevel) -> Result<ffi::CString, Error> {
235 let write_string = match level.convert() {
236 Ok(-2) => "wu".to_string(),
237 Ok(-1) => "w".to_string(),
238 Ok(n @ 0..=9) => format!("w{}", n),
239 Err(e) => return Err(e),
240 Ok(i) => return Err(Error::BgzfInvalidCompressionLevel { level: i }),
242 };
243 return Ok(ffi::CString::new(write_string).unwrap());
244 }
245
246 pub fn set_thread_pool(&mut self, tpool: &ThreadPool) -> Result<()> {
252 self.tpool = Some(tpool.clone());
253 let b = tpool.handle.borrow_mut();
254 let r = unsafe {
255 htslib::bgzf_thread_pool(self.inner, b.inner.pool as *mut _, 0) };
257
258 if r != 0 {
259 Err(Error::ThreadPool)
260 } else {
261 Ok(())
262 }
263 }
264}
265
266impl std::io::Write for Writer {
267 fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
268 let nbytes =
269 unsafe { htslib::bgzf_write(self.inner, buf.as_ptr() as *mut libc::c_void, buf.len()) };
270 if nbytes < 0 {
271 Err(std::io::Error::new(
272 std::io::ErrorKind::Other,
273 "Can not write",
274 ))
275 } else {
276 Ok(nbytes as usize)
277 }
278 }
279
280 fn flush(&mut self) -> std::io::Result<()> {
281 let exit_code: i32 = unsafe { htslib::bgzf_flush(self.inner) };
282 if exit_code == 0 {
283 Ok(())
284 } else {
285 Err(std::io::Error::new(
286 std::io::ErrorKind::Other,
287 "Can not flush",
288 ))
289 }
290 }
291}
292
293impl std::ops::Drop for Writer {
294 fn drop(&mut self) {
295 unsafe {
296 htslib::bgzf_close(self.inner);
297 }
298 }
299}
300
301#[cfg(test)]
302mod tests {
303 use super::*;
304 use std::io::Read;
305 use std::io::Write;
306
307 const FN_PLAIN: &str = concat!(env!("CARGO_MANIFEST_DIR"), "/test/bgzip/plain.vcf");
309 const FN_GZIP: &str = concat!(env!("CARGO_MANIFEST_DIR"), "/test/bgzip/gzip.vcf.gz");
310 const FN_BGZIP: &str = concat!(env!("CARGO_MANIFEST_DIR"), "/test/bgzip/bgzip.vcf.gz");
311
312 const CONTENT: &str = include_str!("../../test/bgzip/plain.vcf");
313
314 #[test]
315 fn test_is_bgzip_plain() {
316 assert!(
317 !is_bgzip(FN_PLAIN).unwrap(),
318 "Plain file not detected as BGZIP"
319 );
320 assert!(
321 !is_bgzip(FN_GZIP).unwrap(),
322 "Zip file not detected as BGZIP"
323 );
324 assert!(is_bgzip(FN_BGZIP).unwrap(), "Bgzip file detected as BGZIP");
325 }
326
327 #[test]
328 fn test_open_plain() {
329 let r_result = Reader::from_path(FN_PLAIN);
330 assert!(r_result.is_ok(), "Open plain file with Bgzip reader");
331
332 let mut my_content = String::new();
333 let reading_result = r_result.unwrap().read_to_string(&mut my_content);
334 assert!(
335 reading_result.is_ok(),
336 "Reading plain file into buffer is ok"
337 );
338 assert_eq!(
339 reading_result.unwrap(),
340 190,
341 "Reading plain file into buffer is correct size"
342 );
343 assert_eq!(
344 my_content, CONTENT,
345 "Reading plain file with correct content"
346 );
347 }
348
349 #[test]
350 fn test_open_gzip() {
351 let r_result = Reader::from_path(FN_GZIP);
352 assert!(r_result.is_ok(), "Open gzip file with Bgzip reader");
353
354 let mut my_content = String::new();
355 let reading_result = r_result.unwrap().read_to_string(&mut my_content);
356 assert!(
357 reading_result.is_ok(),
358 "Reading gzip file into buffer is ok"
359 );
360 assert_eq!(
361 reading_result.unwrap(),
362 190,
363 "Reading gzip file into buffer is correct size"
364 );
365 assert_eq!(
366 my_content, CONTENT,
367 "Reading gzip file with correct content"
368 );
369 }
370
371 #[test]
372 fn test_open_bgzip() {
373 let r_result = Reader::from_path(FN_BGZIP);
374 assert!(r_result.is_ok(), "Open bgzip file with Bgzip reader");
375
376 let mut my_content = String::new();
377 let reading_result = r_result.unwrap().read_to_string(&mut my_content);
378 assert!(
379 reading_result.is_ok(),
380 "Reading bgzip file into buffer is ok"
381 );
382 assert_eq!(
383 reading_result.unwrap(),
384 190,
385 "Reading bgzip file into buffer is correct size"
386 );
387 assert_eq!(
388 my_content, CONTENT,
389 "Reading bgzip file with correct content"
390 );
391 }
392 #[test]
393 fn test_set_threadpool() {
394 let r_result = Reader::from_path(FN_BGZIP);
395 assert!(r_result.is_ok(), "Open bgzip file with Bgzip reader");
396 let mut r = r_result.unwrap();
397
398 let tpool_result = ThreadPool::new(5);
399 assert!(tpool_result.is_ok(), "Creating thread pool");
400 let tpool = tpool_result.unwrap();
401
402 let set_result = r.set_thread_pool(&tpool);
403 assert_eq!(set_result, Ok(()), "Setting thread pool okay");
404
405 let mut my_content = String::new();
406 let reading_result = r.read_to_string(&mut my_content);
407 assert!(
408 reading_result.is_ok(),
409 "Reading bgzip file into buffer is ok - using a threadpool"
410 );
411 assert_eq!(
412 reading_result.unwrap(),
413 190,
414 "Reading bgzip file into buffer is correct size using a threadpool"
415 );
416 assert_eq!(
417 my_content, CONTENT,
418 "Reading bgzip file with correct content using a threadpool"
419 );
420 }
421
422 #[test]
423 fn test_write_plain() {
424 let tmp = tempfile::Builder::new()
425 .prefix("rust-htslib")
426 .tempdir()
427 .expect("Cannot create temp dir");
428 let out_path = tmp.path().join("test.vcf");
429 println!("{:?}", out_path);
430
431 {
432 let w_result = Writer::from_path_with_level(&out_path, CompressionLevel::NoCompression);
433 if let Err(ref e) = w_result {
434 println!("w_result is {}", e);
435 }
436 assert!(w_result.is_ok(), "Create plain file with Bgzip writer");
437 assert!(out_path.exists(), "Plain file is created with Bgzip writer");
438 let mut w = w_result.unwrap();
439 let write_result = w.write_all(CONTENT.as_bytes());
440 assert!(
441 write_result.is_ok(),
442 "Plain file can write with Bgzip writer"
443 );
444 } assert!(
446 !is_bgzip(&out_path).unwrap(),
447 "NoCompression file should not be detected as BGZIP"
448 );
449 let my_content = std::fs::read_to_string(&out_path).unwrap();
450 assert_eq!(
451 my_content, CONTENT,
452 "Writing bgzip file with no compression"
453 );
454
455 tmp.close().expect("Failed to delete temp dir");
456 }
457
458 #[test]
459 fn test_write_default() {
460 let tmp = tempfile::Builder::new()
461 .prefix("rust-htslib")
462 .tempdir()
463 .expect("Cannot create temp dir");
464 let out_path = tmp.path().join("test.vcf.bgzf");
465 println!("{:?}", out_path);
466 {
467 let w_result = Writer::from_path(&out_path);
468 if let Err(ref e) = w_result {
469 println!("w_result is {}", e);
470 }
471 assert!(w_result.is_ok(), "Create bgzip file with Bgzip writer");
472 assert!(
473 std::path::Path::new(&out_path).exists(),
474 "Bgzip file is created with Bgzip writer"
475 );
476 let mut w = w_result.unwrap();
477 let write_result = w.write_all(CONTENT.as_bytes());
478 assert!(
479 write_result.is_ok(),
480 "Bgzip file can write with Bgzip writer"
481 );
482 } let mut my_content = String::new();
486 Reader::from_path(&out_path)
487 .unwrap()
488 .read_to_string(&mut my_content)
489 .unwrap();
490 assert_eq!(
491 my_content, CONTENT,
492 "Writing bgzip file with default compression"
493 );
494
495 assert!(
496 is_bgzip(&out_path).unwrap(),
497 "Default BGZIP file detected as BGZIP"
498 );
499 tmp.close().expect("Failed to delete temp dir");
500 }
501
502 #[test]
503 fn test_write_compression_levels() {
504 let tmp = tempfile::Builder::new()
505 .prefix("rust-htslib")
506 .tempdir()
507 .expect("Cannot create temp dir");
508 let out_path = tmp.path().join("test.vcf.bgzf");
509
510 let compression_levels = vec![
512 CompressionLevel::Fastest,
513 CompressionLevel::Maximum,
514 CompressionLevel::Uncompressed,
515 ]
516 .into_iter()
517 .chain((-1..=9_i8).map(|n| CompressionLevel::Level(n)));
518
519 for level in compression_levels {
520 {
521 let w_result = Writer::from_path_with_level(&out_path, level);
522 if let Err(ref e) = w_result {
523 println!("w_result is {}", e);
524 }
525 assert!(w_result.is_ok(), "Create bgzip file with Bgzip writer");
526 assert!(
527 std::path::Path::new(&out_path).exists(),
528 "Bgzip file is created with Bgzip writer"
529 );
530 let mut w = w_result.unwrap();
531 let write_result = w.write_all(CONTENT.as_bytes());
532 assert!(
533 write_result.is_ok(),
534 "Bgzip file can write with Bgzip writer"
535 );
536 } let mut my_content = String::new();
540 Reader::from_path(&out_path)
541 .unwrap()
542 .read_to_string(&mut my_content)
543 .unwrap();
544 assert_eq!(
545 my_content, CONTENT,
546 "Writing bgzip file with {:?} compression",
547 level
548 );
549
550 assert!(
551 is_bgzip(&out_path).unwrap(),
552 "Writing BGZIP file with {:?} compression detected as BGZIP",
553 level
554 );
555 }
556 tmp.close().expect("Failed to delete temp dir");
557 }
558
559 #[test]
560 fn test_write_with_threadpool() {
561 let tmp = tempfile::Builder::new()
562 .prefix("rust-htslib")
563 .tempdir()
564 .expect("Cannot create temp dir");
565 let out_path = tmp.path().join("test.vcf.bgzf");
566
567 let content = CONTENT.as_bytes();
568 println!("{:?}", out_path);
569 {
570 let w_result = Writer::from_path(&out_path);
571 if let Err(ref e) = w_result {
572 println!("w_result is {}", e);
573 }
574 assert!(w_result.is_ok(), "Create bgzip file with Bgzip threadpool");
575 assert!(
576 std::path::Path::new(&out_path).exists(),
577 "Bgzip file is created with Bgzip threadpool"
578 );
579
580 let mut w = w_result.unwrap();
581 let tpool_result = ThreadPool::new(5);
582 assert!(tpool_result.is_ok(), "Creating thread pool");
583 let tpool = tpool_result.unwrap();
584
585 let set_tpool_result = w.set_thread_pool(&tpool);
586 assert!(set_tpool_result.is_ok(), "Setting thread pool");
587
588 let write_result = w.write_all(content);
589 assert!(
590 write_result.is_ok(),
591 "Bgzip file can write with Bgzip threadpool"
592 );
593 } let mut my_content = String::new();
597 Reader::from_path(&out_path)
598 .unwrap()
599 .read_to_string(&mut my_content)
600 .unwrap();
601 assert_eq!(my_content, CONTENT, "Writing bgzip file with threadpool");
602
603 assert!(
604 is_bgzip(&out_path).unwrap(),
605 "Threadpool BGZIP file detected as BGZIP"
606 );
607
608 tmp.close().expect("Failed to delete temp dir");
609 }
610}