gix_chunk/file/write.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136
use crate::file::{index::Entry, Index};
mod write_chunk {
use std::collections::VecDeque;
use crate::file::index;
/// A [`Write`][std::io::Write] implementation that validates chunk sizes while allowing the user to know
/// which chunk is to be written next.
pub struct Chunk<W> {
chunks_to_write: VecDeque<index::Entry>,
inner: W,
next_chunk: Option<index::Entry>,
written_bytes: usize,
}
impl<W> Chunk<W>
where
W: std::io::Write,
{
pub(crate) fn new(out: W, chunks: VecDeque<index::Entry>) -> Chunk<W>
where
W: std::io::Write,
{
Chunk {
chunks_to_write: chunks,
inner: out,
next_chunk: None,
written_bytes: 0,
}
}
}
impl<W> std::io::Write for Chunk<W>
where
W: std::io::Write,
{
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
let written = self.inner.write(buf)?;
self.written_bytes += written;
Ok(written)
}
fn flush(&mut self) -> std::io::Result<()> {
self.inner.flush()
}
}
impl<W> Chunk<W> {
/// Return the inner writer - should only be called once there is no more chunk to write.
pub fn into_inner(self) -> W {
self.inner
}
/// Return the next chunk-id to write, if there is one.
pub fn next_chunk(&mut self) -> Option<crate::Id> {
if let Some(entry) = self.next_chunk.take() {
assert_eq!(
entry.offset.end,
self.written_bytes as u64,
"BUG: expected to write {} bytes, but only wrote {} for chunk {:?}",
entry.offset.end,
self.written_bytes,
std::str::from_utf8(&entry.kind)
);
}
self.written_bytes = 0;
self.next_chunk = self.chunks_to_write.pop_front();
self.next_chunk.as_ref().map(|e| e.kind)
}
}
}
pub use write_chunk::Chunk;
/// Writing
impl Index {
/// Create a new index whose sole purpose is to be receiving chunks using [`plan_chunk()`][Index::plan_chunk()] and to be written to
/// an output using [`into_write()`][Index::into_write()]
pub fn for_writing() -> Self {
Index {
will_write: true,
chunks: Vec::new(),
}
}
/// Plan to write a new chunk as part of the index when [`into_write()`][Index::into_write()] is called.
pub fn plan_chunk(&mut self, chunk: crate::Id, exact_size_on_disk: u64) {
assert!(self.will_write, "BUG: create the index with `for_writing()`");
assert!(
!self.chunks.iter().any(|e| e.kind == chunk),
"BUG: must not add chunk of same kind twice: {:?}",
std::str::from_utf8(&chunk)
);
self.chunks.push(Entry {
kind: chunk,
offset: 0..exact_size_on_disk,
});
}
/// Return the total size of all planned chunks thus far.
pub fn planned_storage_size(&self) -> u64 {
assert!(self.will_write, "BUG: create the index with `for_writing()`");
self.chunks.iter().map(|e| e.offset.end).sum()
}
/// Return the amount of chunks we currently know.
pub fn num_chunks(&self) -> usize {
self.chunks.len()
}
/// After [planning all chunks][Index::plan_chunk()] call this method with the destination to write the chunks to.
/// Use the [Chunk] writer to write each chunk in order.
/// `current_offset` is the byte position at which `out` will continue writing.
pub fn into_write<W>(self, mut out: W, current_offset: usize) -> std::io::Result<Chunk<W>>
where
W: std::io::Write,
{
assert!(
self.will_write,
"BUG: create the index with `for_writing()`, cannot write decoded indices"
);
// First chunk starts past the table of contents
let mut current_offset = (current_offset + Self::size_for_entries(self.num_chunks())) as u64;
for entry in &self.chunks {
out.write_all(&entry.kind)?;
out.write_all(¤t_offset.to_be_bytes())?;
current_offset += entry.offset.end;
}
// sentinel to mark end of chunks
out.write_all(&0u32.to_be_bytes())?;
out.write_all(¤t_offset.to_be_bytes())?;
Ok(Chunk::new(out, self.chunks.into()))
}
}