gix_pack/multi_index/
chunk.rspub mod index_names {
use std::path::{Path, PathBuf};
use gix_object::bstr::{BString, ByteSlice};
pub const ID: gix_chunk::Id = *b"PNAM";
pub mod decode {
use gix_object::bstr::BString;
#[derive(Debug, thiserror::Error)]
#[allow(missing_docs)]
pub enum Error {
#[error("The pack names were not ordered alphabetically.")]
NotOrderedAlphabetically,
#[error("Each pack path name must be terminated with a null byte")]
MissingNullByte,
#[error("Couldn't turn path '{path}' into OS path due to encoding issues")]
PathEncoding { path: BString },
#[error("non-padding bytes found after all paths were read.")]
UnknownTrailerBytes,
}
}
pub fn from_bytes(mut chunk: &[u8], num_packs: u32) -> Result<Vec<PathBuf>, decode::Error> {
let mut out = Vec::new();
for _ in 0..num_packs {
let null_byte_pos = chunk.find_byte(b'\0').ok_or(decode::Error::MissingNullByte)?;
let path = &chunk[..null_byte_pos];
let path = gix_path::try_from_byte_slice(path)
.map_err(|_| decode::Error::PathEncoding {
path: BString::from(path),
})?
.to_owned();
if let Some(previous) = out.last() {
if previous >= &path {
return Err(decode::Error::NotOrderedAlphabetically);
}
}
out.push(path);
chunk = &chunk[null_byte_pos + 1..];
}
if !chunk.is_empty() && !chunk.iter().all(|b| *b == 0) {
return Err(decode::Error::UnknownTrailerBytes);
}
Ok(out)
}
pub fn storage_size(paths: impl IntoIterator<Item = impl AsRef<Path>>) -> u64 {
let mut count = 0u64;
for path in paths {
let path = path.as_ref();
let ascii_path = path.to_str().expect("UTF-8 compatible paths");
assert!(
ascii_path.is_ascii(),
"must use ascii bytes for correct size computation"
);
count += (ascii_path.as_bytes().len() + 1) as u64;
}
let needed_alignment = CHUNK_ALIGNMENT - (count % CHUNK_ALIGNMENT);
if needed_alignment < CHUNK_ALIGNMENT {
count += needed_alignment;
}
count
}
pub fn write(
paths: impl IntoIterator<Item = impl AsRef<Path>>,
out: &mut dyn std::io::Write,
) -> std::io::Result<()> {
let mut written_bytes = 0;
for path in paths {
let path = path.as_ref().to_str().expect("UTF-8 path");
out.write_all(path.as_bytes())?;
out.write_all(&[0])?;
written_bytes += path.as_bytes().len() as u64 + 1;
}
let needed_alignment = CHUNK_ALIGNMENT - (written_bytes % CHUNK_ALIGNMENT);
if needed_alignment < CHUNK_ALIGNMENT {
let padding = [0u8; CHUNK_ALIGNMENT as usize];
out.write_all(&padding[..needed_alignment as usize])?;
}
Ok(())
}
const CHUNK_ALIGNMENT: u64 = 4;
}
pub mod fanout {
use crate::multi_index;
pub const SIZE: usize = 4 * 256;
pub const ID: gix_chunk::Id = *b"OIDF";
pub fn from_bytes(chunk: &[u8]) -> Option<[u32; 256]> {
if chunk.len() != SIZE {
return None;
}
let mut out = [0; 256];
for (c, f) in chunk.chunks_exact(4).zip(out.iter_mut()) {
*f = u32::from_be_bytes(c.try_into().unwrap());
}
out.into()
}
pub(crate) fn write(
sorted_entries: &[multi_index::write::Entry],
out: &mut dyn std::io::Write,
) -> std::io::Result<()> {
let fanout = crate::index::encode::fanout(&mut sorted_entries.iter().map(|e| e.id.first_byte()));
for value in fanout.iter() {
out.write_all(&value.to_be_bytes())?;
}
Ok(())
}
}
pub mod lookup {
use std::ops::Range;
use crate::multi_index;
pub const ID: gix_chunk::Id = *b"OIDL";
pub fn storage_size(entries: usize, object_hash: gix_hash::Kind) -> u64 {
(entries * object_hash.len_in_bytes()) as u64
}
pub(crate) fn write(
sorted_entries: &[multi_index::write::Entry],
out: &mut dyn std::io::Write,
) -> std::io::Result<()> {
for entry in sorted_entries {
out.write_all(entry.id.as_slice())?;
}
Ok(())
}
pub fn is_valid(offset: &Range<usize>, hash: gix_hash::Kind, num_objects: u32) -> bool {
(offset.end - offset.start) / hash.len_in_bytes() == num_objects as usize
}
}
pub mod offsets {
use std::ops::Range;
use crate::multi_index;
pub const ID: gix_chunk::Id = *b"OOFF";
pub fn storage_size(entries: usize) -> u64 {
(entries * (4 + 4)) as u64
}
pub(crate) fn write(
sorted_entries: &[multi_index::write::Entry],
large_offsets_needed: bool,
out: &mut dyn std::io::Write,
) -> std::io::Result<()> {
use crate::index::encode::{HIGH_BIT, LARGE_OFFSET_THRESHOLD};
let mut num_large_offsets = 0u32;
for entry in sorted_entries {
out.write_all(&entry.pack_index.to_be_bytes())?;
let offset: u32 = if large_offsets_needed {
if entry.pack_offset > LARGE_OFFSET_THRESHOLD {
let res = num_large_offsets | HIGH_BIT;
num_large_offsets += 1;
res
} else {
entry.pack_offset as u32
}
} else {
entry
.pack_offset
.try_into()
.expect("without large offsets, pack-offset fits u32")
};
out.write_all(&offset.to_be_bytes())?;
}
Ok(())
}
pub fn is_valid(offset: &Range<usize>, num_objects: u32) -> bool {
let entry_size = 4 + 4 ;
((offset.end - offset.start) / num_objects as usize) == entry_size
}
}
pub mod large_offsets {
use std::ops::Range;
use crate::{index::encode::LARGE_OFFSET_THRESHOLD, multi_index};
pub const ID: gix_chunk::Id = *b"LOFF";
pub(crate) fn num_large_offsets(entries: &[multi_index::write::Entry]) -> Option<usize> {
let mut num_large_offsets = 0;
let mut needs_large_offsets = false;
for entry in entries {
if entry.pack_offset > LARGE_OFFSET_THRESHOLD {
num_large_offsets += 1;
}
if entry.pack_offset > crate::data::Offset::from(u32::MAX) {
needs_large_offsets = true;
}
}
needs_large_offsets.then_some(num_large_offsets)
}
pub fn is_valid(offset: &Range<usize>) -> bool {
(offset.end - offset.start) % 8 == 0
}
pub(crate) fn write(
sorted_entries: &[multi_index::write::Entry],
mut num_large_offsets: usize,
out: &mut dyn std::io::Write,
) -> std::io::Result<()> {
for offset in sorted_entries
.iter()
.filter_map(|e| (e.pack_offset > LARGE_OFFSET_THRESHOLD).then_some(e.pack_offset))
{
out.write_all(&offset.to_be_bytes())?;
num_large_offsets = num_large_offsets
.checked_sub(1)
.expect("BUG: wrote more offsets the previously found");
}
assert_eq!(num_large_offsets, 0, "BUG: wrote less offsets than initially counted");
Ok(())
}
pub(crate) fn storage_size(large_offsets: usize) -> u64 {
8 * large_offsets as u64
}
}