use std::ffi;
use std::path::Path;
use url::Url;
use crate::htslib;
use crate::tpool::ThreadPool;
use crate::errors::{Error, Result};
fn path_as_bytes<'a, P: 'a + AsRef<Path>>(path: P, must_exist: bool) -> Result<Vec<u8>> {
if path.as_ref().exists() || !must_exist {
Ok(path
.as_ref()
.to_str()
.ok_or(Error::NonUnicodePath)?
.as_bytes()
.to_owned())
} else {
Err(Error::FileNotFound {
path: path.as_ref().to_owned(),
})
}
}
pub fn is_bgzip<P: AsRef<Path>>(path: P) -> Result<bool, Error> {
let byte_path = path_as_bytes(path, true)?;
let cpath = ffi::CString::new(byte_path).unwrap();
let is_bgzf = unsafe { htslib::bgzf_is_bgzf(cpath.as_ptr()) == 1 };
Ok(is_bgzf)
}
#[derive(Debug)]
pub struct Reader {
inner: *mut htslib::BGZF,
}
impl Reader {
pub fn from_stdin() -> Result<Self, Error> {
Self::new(b"-")
}
pub fn from_path<P: AsRef<Path>>(path: P) -> Result<Self, Error> {
Self::new(&path_as_bytes(path, true)?)
}
pub fn from_url(url: &Url) -> Result<Self, Error> {
Self::new(url.as_str().as_bytes())
}
fn new(path: &[u8]) -> Result<Self, Error> {
let mode = ffi::CString::new("r").unwrap();
let cpath = ffi::CString::new(path).unwrap();
let inner = unsafe { htslib::bgzf_open(cpath.as_ptr(), mode.as_ptr()) };
Ok(Self { inner })
}
pub fn set_thread_pool(&mut self, tpool: &ThreadPool) -> Result<()> {
let b = tpool.handle.borrow_mut();
let r = unsafe {
htslib::bgzf_thread_pool(self.inner, b.inner.pool as *mut _, 0) };
if r != 0 {
Err(Error::ThreadPool)
} else {
Ok(())
}
}
}
impl std::io::Read for Reader {
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
let nbytes = unsafe {
htslib::bgzf_read(self.inner, buf.as_mut_ptr() as *mut libc::c_void, buf.len())
};
if nbytes < 0 {
Err(std::io::Error::new(
std::io::ErrorKind::Other,
"Can not read",
))
} else {
Ok(nbytes as usize)
}
}
}
#[derive(Debug, Clone, Copy)]
pub enum CompressionLevel {
Default,
NoCompression,
Uncompressed,
Fastest,
Maximum,
Level(i8),
}
impl CompressionLevel {
fn convert(self) -> Result<i8> {
match self {
CompressionLevel::NoCompression => Ok(-2),
CompressionLevel::Default => Ok(-1),
CompressionLevel::Uncompressed => Ok(0),
CompressionLevel::Fastest => Ok(1),
CompressionLevel::Maximum => Ok(9),
CompressionLevel::Level(i @ -2..=9) => Ok(i),
CompressionLevel::Level(i) => Err(Error::BgzfInvalidCompressionLevel { level: i }),
}
}
}
#[derive(Debug)]
pub struct Writer {
inner: *mut htslib::BGZF,
tpool: Option<ThreadPool>,
}
impl Writer {
pub fn from_stdout() -> Result<Self, Error> {
Self::from_stdout_with_compression(CompressionLevel::Default)
}
pub fn from_stdout_with_compression(level: CompressionLevel) -> Result<Self, Error> {
Self::new(b"-", level)
}
pub fn from_path<P: AsRef<Path>>(path: P) -> Result<Self, Error> {
Self::from_path_with_level(path, CompressionLevel::Default)
}
pub fn from_path_with_level<P: AsRef<Path>>(
path: P,
level: CompressionLevel,
) -> Result<Self, Error> {
Self::new(&path_as_bytes(path, false)?, level)
}
fn new(path: &[u8], level: CompressionLevel) -> Result<Self, Error> {
let mode = Self::get_open_mode(level)?;
let cpath = ffi::CString::new(path).unwrap();
let inner = unsafe { htslib::bgzf_open(cpath.as_ptr(), mode.as_ptr()) };
Ok(Self { inner, tpool: None })
}
fn get_open_mode(level: CompressionLevel) -> Result<ffi::CString, Error> {
let write_string = match level.convert() {
Ok(-2) => "wu".to_string(),
Ok(-1) => "w".to_string(),
Ok(n @ 0..=9) => format!("w{}", n),
Err(e) => return Err(e),
Ok(i) => return Err(Error::BgzfInvalidCompressionLevel { level: i }),
};
return Ok(ffi::CString::new(write_string).unwrap());
}
pub fn set_thread_pool(&mut self, tpool: &ThreadPool) -> Result<()> {
self.tpool = Some(tpool.clone());
let b = tpool.handle.borrow_mut();
let r = unsafe {
htslib::bgzf_thread_pool(self.inner, b.inner.pool as *mut _, 0) };
if r != 0 {
Err(Error::ThreadPool)
} else {
Ok(())
}
}
}
impl std::io::Write for Writer {
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
let nbytes =
unsafe { htslib::bgzf_write(self.inner, buf.as_ptr() as *mut libc::c_void, buf.len()) };
if nbytes < 0 {
Err(std::io::Error::new(
std::io::ErrorKind::Other,
"Can not write",
))
} else {
Ok(nbytes as usize)
}
}
fn flush(&mut self) -> std::io::Result<()> {
let exit_code: i32 = unsafe { htslib::bgzf_flush(self.inner) };
if exit_code == 0 {
Ok(())
} else {
Err(std::io::Error::new(
std::io::ErrorKind::Other,
"Can not flush",
))
}
}
}
impl std::ops::Drop for Writer {
fn drop(&mut self) {
unsafe {
htslib::bgzf_close(self.inner);
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Read;
use std::io::Write;
const FN_PLAIN: &str = concat!(env!("CARGO_MANIFEST_DIR"), "/test/bgzip/plain.vcf");
const FN_GZIP: &str = concat!(env!("CARGO_MANIFEST_DIR"), "/test/bgzip/gzip.vcf.gz");
const FN_BGZIP: &str = concat!(env!("CARGO_MANIFEST_DIR"), "/test/bgzip/bgzip.vcf.gz");
const CONTENT: &str = include_str!("../../test/bgzip/plain.vcf");
#[test]
fn test_is_bgzip_plain() {
assert!(
!is_bgzip(FN_PLAIN).unwrap(),
"Plain file not detected as BGZIP"
);
assert!(
!is_bgzip(FN_GZIP).unwrap(),
"Zip file not detected as BGZIP"
);
assert!(is_bgzip(FN_BGZIP).unwrap(), "Bgzip file detected as BGZIP");
}
#[test]
fn test_open_plain() {
let r_result = Reader::from_path(FN_PLAIN);
assert!(r_result.is_ok(), "Open plain file with Bgzip reader");
let mut my_content = String::new();
let reading_result = r_result.unwrap().read_to_string(&mut my_content);
assert!(
reading_result.is_ok(),
"Reading plain file into buffer is ok"
);
assert_eq!(
reading_result.unwrap(),
190,
"Reading plain file into buffer is correct size"
);
assert_eq!(
my_content, CONTENT,
"Reading plain file with correct content"
);
}
#[test]
fn test_open_gzip() {
let r_result = Reader::from_path(FN_GZIP);
assert!(r_result.is_ok(), "Open gzip file with Bgzip reader");
let mut my_content = String::new();
let reading_result = r_result.unwrap().read_to_string(&mut my_content);
assert!(
reading_result.is_ok(),
"Reading gzip file into buffer is ok"
);
assert_eq!(
reading_result.unwrap(),
190,
"Reading gzip file into buffer is correct size"
);
assert_eq!(
my_content, CONTENT,
"Reading gzip file with correct content"
);
}
#[test]
fn test_open_bgzip() {
let r_result = Reader::from_path(FN_BGZIP);
assert!(r_result.is_ok(), "Open bgzip file with Bgzip reader");
let mut my_content = String::new();
let reading_result = r_result.unwrap().read_to_string(&mut my_content);
assert!(
reading_result.is_ok(),
"Reading bgzip file into buffer is ok"
);
assert_eq!(
reading_result.unwrap(),
190,
"Reading bgzip file into buffer is correct size"
);
assert_eq!(
my_content, CONTENT,
"Reading bgzip file with correct content"
);
}
#[test]
fn test_set_threadpool() {
let r_result = Reader::from_path(FN_BGZIP);
assert!(r_result.is_ok(), "Open bgzip file with Bgzip reader");
let mut r = r_result.unwrap();
let tpool_result = ThreadPool::new(5);
assert!(tpool_result.is_ok(), "Creating thread pool");
let tpool = tpool_result.unwrap();
let set_result = r.set_thread_pool(&tpool);
assert_eq!(set_result, Ok(()), "Setting thread pool okay");
let mut my_content = String::new();
let reading_result = r.read_to_string(&mut my_content);
assert!(
reading_result.is_ok(),
"Reading bgzip file into buffer is ok - using a threadpool"
);
assert_eq!(
reading_result.unwrap(),
190,
"Reading bgzip file into buffer is correct size using a threadpool"
);
assert_eq!(
my_content, CONTENT,
"Reading bgzip file with correct content using a threadpool"
);
}
#[test]
fn test_write_plain() {
let tmp = tempfile::Builder::new()
.prefix("rust-htslib")
.tempdir()
.expect("Cannot create temp dir");
let out_path = tmp.path().join("test.vcf");
println!("{:?}", out_path);
{
let w_result = Writer::from_path_with_level(&out_path, CompressionLevel::NoCompression);
if let Err(ref e) = w_result {
println!("w_result is {}", e);
}
assert!(w_result.is_ok(), "Create plain file with Bgzip writer");
assert!(out_path.exists(), "Plain file is created with Bgzip writer");
let mut w = w_result.unwrap();
let write_result = w.write_all(CONTENT.as_bytes());
assert!(
write_result.is_ok(),
"Plain file can write with Bgzip writer"
);
} assert!(
!is_bgzip(&out_path).unwrap(),
"NoCompression file should not be detected as BGZIP"
);
let my_content = std::fs::read_to_string(&out_path).unwrap();
assert_eq!(
my_content, CONTENT,
"Writing bgzip file with no compression"
);
tmp.close().expect("Failed to delete temp dir");
}
#[test]
fn test_write_default() {
let tmp = tempfile::Builder::new()
.prefix("rust-htslib")
.tempdir()
.expect("Cannot create temp dir");
let out_path = tmp.path().join("test.vcf.bgzf");
println!("{:?}", out_path);
{
let w_result = Writer::from_path(&out_path);
if let Err(ref e) = w_result {
println!("w_result is {}", e);
}
assert!(w_result.is_ok(), "Create bgzip file with Bgzip writer");
assert!(
std::path::Path::new(&out_path).exists(),
"Bgzip file is created with Bgzip writer"
);
let mut w = w_result.unwrap();
let write_result = w.write_all(CONTENT.as_bytes());
assert!(
write_result.is_ok(),
"Bgzip file can write with Bgzip writer"
);
} let mut my_content = String::new();
Reader::from_path(&out_path)
.unwrap()
.read_to_string(&mut my_content)
.unwrap();
assert_eq!(
my_content, CONTENT,
"Writing bgzip file with default compression"
);
assert!(
is_bgzip(&out_path).unwrap(),
"Default BGZIP file detected as BGZIP"
);
tmp.close().expect("Failed to delete temp dir");
}
#[test]
fn test_write_compression_levels() {
let tmp = tempfile::Builder::new()
.prefix("rust-htslib")
.tempdir()
.expect("Cannot create temp dir");
let out_path = tmp.path().join("test.vcf.bgzf");
let compression_levels = vec![
CompressionLevel::Fastest,
CompressionLevel::Maximum,
CompressionLevel::Uncompressed,
]
.into_iter()
.chain((-1..=9_i8).map(|n| CompressionLevel::Level(n)));
for level in compression_levels {
{
let w_result = Writer::from_path_with_level(&out_path, level);
if let Err(ref e) = w_result {
println!("w_result is {}", e);
}
assert!(w_result.is_ok(), "Create bgzip file with Bgzip writer");
assert!(
std::path::Path::new(&out_path).exists(),
"Bgzip file is created with Bgzip writer"
);
let mut w = w_result.unwrap();
let write_result = w.write_all(CONTENT.as_bytes());
assert!(
write_result.is_ok(),
"Bgzip file can write with Bgzip writer"
);
} let mut my_content = String::new();
Reader::from_path(&out_path)
.unwrap()
.read_to_string(&mut my_content)
.unwrap();
assert_eq!(
my_content, CONTENT,
"Writing bgzip file with {:?} compression",
level
);
assert!(
is_bgzip(&out_path).unwrap(),
"Writing BGZIP file with {:?} compression detected as BGZIP",
level
);
}
tmp.close().expect("Failed to delete temp dir");
}
#[test]
fn test_write_with_threadpool() {
let tmp = tempfile::Builder::new()
.prefix("rust-htslib")
.tempdir()
.expect("Cannot create temp dir");
let out_path = tmp.path().join("test.vcf.bgzf");
let content = CONTENT.as_bytes();
println!("{:?}", out_path);
{
let w_result = Writer::from_path(&out_path);
if let Err(ref e) = w_result {
println!("w_result is {}", e);
}
assert!(w_result.is_ok(), "Create bgzip file with Bgzip threadpool");
assert!(
std::path::Path::new(&out_path).exists(),
"Bgzip file is created with Bgzip threadpool"
);
let mut w = w_result.unwrap();
let tpool_result = ThreadPool::new(5);
assert!(tpool_result.is_ok(), "Creating thread pool");
let tpool = tpool_result.unwrap();
let set_tpool_result = w.set_thread_pool(&tpool);
assert!(set_tpool_result.is_ok(), "Setting thread pool");
let write_result = w.write_all(content);
assert!(
write_result.is_ok(),
"Bgzip file can write with Bgzip threadpool"
);
} let mut my_content = String::new();
Reader::from_path(&out_path)
.unwrap()
.read_to_string(&mut my_content)
.unwrap();
assert_eq!(my_content, CONTENT, "Writing bgzip file with threadpool");
assert!(
is_bgzip(&out_path).unwrap(),
"Threadpool BGZIP file detected as BGZIP"
);
tmp.close().expect("Failed to delete temp dir");
}
}