1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163
#![warn(missing_docs)]
//! **noodles-bgzf** handles the reading and writing of the blocked gzip format (BGZF).
//!
//! While the gzip format is typically a single stream, a BGZF is the concatenation of many gzip
//! streams. Each stream is called a block, with its uncompressed data size being constrained to
//! less than 64 KiB. This multistream gzip allows random access using [`virtual positions`].
//!
//! noodles-bgzf abstracts away the concept of blocks, implementing [`std::io::Read`] for the
//! reader and [`std::io::Write`] for the writer.
//!
//! [`virtual positions`]: VirtualPosition
//!
//! # Examples
//!
//! ## Read an entire BGZF file
//!
//! ```no_run
//! # use std::{fs::File, io::{self, Read}};
//! use noodles_bgzf as bgzf;
//! let mut reader = File::open("data.gz").map(bgzf::Reader::new)?;
//! let mut data = Vec::new();
//! reader.read_to_end(&mut data)?;
//! # Ok::<(), io::Error>(())
//! ```
//!
//! ## Write a BGZF file
//!
//! ```no_run
//! # use std::{fs::File, io::{self, Write}};
//! use noodles_bgzf as bgzf;
//! let mut writer = File::create("data.gz").map(bgzf::Writer::new)?;
//! writer.write_all(b"noodles-bgzf")?;
//! # Ok::<(), io::Error>(())
//! ```
#[cfg(feature = "async")]
pub mod r#async;
mod block;
pub(crate) mod deflate;
mod gz;
pub mod gzi;
pub mod indexed_reader;
pub mod io;
mod multithreaded_reader;
pub mod multithreaded_writer;
pub mod reader;
pub mod virtual_position;
pub mod writer;
pub use self::{
indexed_reader::IndexedReader, multithreaded_reader::MultithreadedReader,
multithreaded_writer::MultithreadedWriter, reader::Reader, virtual_position::VirtualPosition,
writer::Writer,
};
#[cfg(feature = "async")]
pub use self::r#async::{Reader as AsyncReader, Writer as AsyncWriter};
use self::block::Block;
// XLEN (2)
const GZIP_XLEN_SIZE: usize = 2;
// SI1 (1) + SI2 (1) + SLEN (2) + BSIZE (2)
const BGZF_XLEN: usize = 6;
// ยง 4.1 The BGZF compression format (2021-06-03): "Thus while `ISIZE` is stored as a `uint32_t` as
// per the gzip format, in BGZF it is limited to the range [0, 65536]."
const BGZF_MAX_ISIZE: usize = 1 << 16;
pub(crate) const BGZF_HEADER_SIZE: usize = gz::HEADER_SIZE + GZIP_XLEN_SIZE + BGZF_XLEN;
#[cfg(test)]
mod tests {
use std::io::{self, BufRead, Cursor, Read, Write};
use super::*;
#[test]
fn test_self() -> io::Result<()> {
let mut writer = Writer::new(Vec::new());
writer.write_all(b"noodles")?;
writer.flush()?;
writer.write_all(b"-")?;
writer.flush()?;
writer.write_all(b"bgzf")?;
let data = writer.finish()?;
let mut reader = Reader::new(&data[..]);
let mut buf = Vec::new();
reader.read_to_end(&mut buf)?;
assert_eq!(buf, b"noodles-bgzf");
Ok(())
}
#[test]
fn test_self_buffered() -> io::Result<()> {
let mut writer = Writer::new(Vec::new());
writer.write_all(b"noodles\n-\nbgzf\nbuffered")?;
let data = writer.finish()?;
let mut reader = Reader::new(&data[..]);
let mut lines = Vec::new();
let mut virtual_positions = Vec::new();
loop {
virtual_positions.push(reader.virtual_position());
let mut line = String::new();
match reader.read_line(&mut line) {
Ok(0) => {
virtual_positions.pop();
break;
}
Err(e) => return Err(e),
_ => (),
}
lines.push(line);
}
let expected_lines = vec!["noodles\n", "-\n", "bgzf\n", "buffered"];
assert_eq!(lines, expected_lines);
let expected_upos = [0, 8, 10, 15];
let expected_virtual_positions: Vec<VirtualPosition> = expected_upos
.iter()
.map(|x| VirtualPosition::try_from((0, *x)).unwrap())
.collect();
assert_eq!(virtual_positions, expected_virtual_positions);
Ok(())
}
#[test]
fn test_self_multithreaded() -> io::Result<()> {
let mut writer = MultithreadedWriter::new(Vec::new());
writer.write_all(b"noodles")?;
writer.flush()?;
writer.write_all(b"-")?;
writer.flush()?;
writer.write_all(b"bgzf")?;
let data = writer.finish().map(Cursor::new)?;
let mut reader = MultithreadedReader::new(data);
let mut buf = Vec::new();
reader.read_to_end(&mut buf)?;
assert_eq!(buf, b"noodles-bgzf");
Ok(())
}
}