gix_hash/oid.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288
use std::hash;
use crate::{Kind, ObjectId, SIZE_OF_SHA1_DIGEST};
/// A borrowed reference to a hash identifying objects.
///
/// # Future Proofing
///
/// In case we wish to support multiple hashes with the same length we cannot discriminate
/// using the slice length anymore. To make that work, we will use the high bits of the
/// internal `bytes` slice length (a fat pointer, pointing to data and its length in bytes)
/// to encode additional information. Before accessing or returning the bytes, a new adjusted
/// slice will be constructed, while the high bits will be used to help resolving the
/// hash [`kind()`][oid::kind()].
/// We expect to have quite a few bits available for such 'conflict resolution' as most hashes aren't longer
/// than 64 bytes.
#[derive(PartialEq, Eq, Ord, PartialOrd)]
#[repr(transparent)]
#[allow(non_camel_case_types)]
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
pub struct oid {
bytes: [u8],
}
// False positive:
// Using an automatic implementation of `Hash` for `oid` would lead to
// it attempting to hash the length of the slice first. On 32 bit systems
// this can lead to issues with the custom `gix_hashtable` `Hasher` implementation,
// and it currently ends up being discarded there anyway.
#[allow(clippy::derived_hash_with_manual_eq)]
impl hash::Hash for oid {
fn hash<H: hash::Hasher>(&self, state: &mut H) {
state.write(self.as_bytes());
}
}
/// A utility able to format itself with the given amount of characters in hex.
#[derive(PartialEq, Eq, Hash, Ord, PartialOrd)]
pub struct HexDisplay<'a> {
inner: &'a oid,
hex_len: usize,
}
impl std::fmt::Display for HexDisplay<'_> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let mut hex = Kind::hex_buf();
let max_len = self.inner.hex_to_buf(hex.as_mut());
let hex = std::str::from_utf8(&hex[..self.hex_len.min(max_len)]).expect("ascii only in hex");
f.write_str(hex)
}
}
impl std::fmt::Debug for oid {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"{}({})",
match self.kind() {
Kind::Sha1 => "Sha1",
},
self.to_hex(),
)
}
}
/// The error returned when trying to convert a byte slice to an [`oid`] or [`ObjectId`]
#[allow(missing_docs)]
#[derive(Debug, thiserror::Error)]
pub enum Error {
#[error("Cannot instantiate git hash from a digest of length {0}")]
InvalidByteSliceLength(usize),
}
/// Conversion
impl oid {
/// Try to create a shared object id from a slice of bytes representing a hash `digest`
#[inline]
pub fn try_from_bytes(digest: &[u8]) -> Result<&Self, Error> {
match digest.len() {
20 => Ok(
#[allow(unsafe_code)]
unsafe {
&*(digest as *const [u8] as *const oid)
},
),
len => Err(Error::InvalidByteSliceLength(len)),
}
}
/// Create an OID from the input `value` slice without performing any safety check.
/// Use only once sure that `value` is a hash of valid length.
pub fn from_bytes_unchecked(value: &[u8]) -> &Self {
Self::from_bytes(value)
}
/// Only from code that statically assures correct sizes using array conversions.
pub(crate) fn from_bytes(value: &[u8]) -> &Self {
#[allow(unsafe_code)]
unsafe {
&*(value as *const [u8] as *const oid)
}
}
}
/// Access
impl oid {
/// The kind of hash used for this instance.
#[inline]
pub fn kind(&self) -> Kind {
Kind::from_len_in_bytes(self.bytes.len())
}
/// The first byte of the hash, commonly used to partition a set of object ids.
#[inline]
pub fn first_byte(&self) -> u8 {
self.bytes[0]
}
/// Interpret this object id as raw byte slice.
#[inline]
pub fn as_bytes(&self) -> &[u8] {
&self.bytes
}
/// Return a type which can display itself in hexadecimal form with the `len` amount of characters.
#[inline]
pub fn to_hex_with_len(&self, len: usize) -> HexDisplay<'_> {
HexDisplay {
inner: self,
hex_len: len,
}
}
/// Return a type which displays this oid as hex in full.
#[inline]
pub fn to_hex(&self) -> HexDisplay<'_> {
HexDisplay {
inner: self,
hex_len: self.bytes.len() * 2,
}
}
/// Returns `true` if this hash consists of all null bytes.
#[inline]
#[doc(alias = "is_zero", alias = "git2")]
pub fn is_null(&self) -> bool {
match self.kind() {
Kind::Sha1 => &self.bytes == oid::null_sha1().as_bytes(),
}
}
}
/// Sha1 specific methods
impl oid {
/// Write ourselves to the `out` in hexadecimal notation, returning the amount of written bytes.
///
/// **Panics** if the buffer isn't big enough to hold twice as many bytes as the current binary size.
#[inline]
#[must_use]
pub fn hex_to_buf(&self, buf: &mut [u8]) -> usize {
let num_hex_bytes = self.bytes.len() * 2;
faster_hex::hex_encode(&self.bytes, &mut buf[..num_hex_bytes]).expect("to count correctly");
num_hex_bytes
}
/// Write ourselves to `out` in hexadecimal notation.
#[inline]
pub fn write_hex_to(&self, out: &mut dyn std::io::Write) -> std::io::Result<()> {
let mut hex = Kind::hex_buf();
let hex_len = self.hex_to_buf(&mut hex);
out.write_all(&hex[..hex_len])
}
/// Returns a Sha1 digest with all bytes being initialized to zero.
#[inline]
pub(crate) fn null_sha1() -> &'static Self {
oid::from_bytes([0u8; SIZE_OF_SHA1_DIGEST].as_ref())
}
}
impl AsRef<oid> for &oid {
fn as_ref(&self) -> &oid {
self
}
}
impl<'a> TryFrom<&'a [u8]> for &'a oid {
type Error = Error;
fn try_from(value: &'a [u8]) -> Result<Self, Self::Error> {
oid::try_from_bytes(value)
}
}
impl ToOwned for oid {
type Owned = ObjectId;
fn to_owned(&self) -> Self::Owned {
match self.kind() {
Kind::Sha1 => ObjectId::Sha1(self.bytes.try_into().expect("no bug in hash detection")),
}
}
}
impl<'a> From<&'a [u8; SIZE_OF_SHA1_DIGEST]> for &'a oid {
fn from(v: &'a [u8; SIZE_OF_SHA1_DIGEST]) -> Self {
oid::from_bytes(v.as_ref())
}
}
impl std::fmt::Display for &oid {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
for b in self.as_bytes() {
write!(f, "{b:02x}")?;
}
Ok(())
}
}
impl PartialEq<ObjectId> for &oid {
fn eq(&self, other: &ObjectId) -> bool {
*self == other.as_ref()
}
}
/// Manually created from a version that uses a slice, and we forcefully try to convert it into a borrowed array of the desired size
/// Could be improved by fitting this into serde.
/// Unfortunately the `serde::Deserialize` derive wouldn't work for borrowed arrays.
#[cfg(feature = "serde")]
impl<'de: 'a, 'a> serde::Deserialize<'de> for &'a oid {
fn deserialize<D>(deserializer: D) -> Result<Self, <D as serde::Deserializer<'de>>::Error>
where
D: serde::Deserializer<'de>,
{
struct __Visitor<'de: 'a, 'a> {
marker: std::marker::PhantomData<&'a oid>,
lifetime: std::marker::PhantomData<&'de ()>,
}
impl<'de: 'a, 'a> serde::de::Visitor<'de> for __Visitor<'de, 'a> {
type Value = &'a oid;
fn expecting(&self, __formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
std::fmt::Formatter::write_str(__formatter, "tuple struct Digest")
}
#[inline]
fn visit_newtype_struct<__E>(self, __e: __E) -> std::result::Result<Self::Value, __E::Error>
where
__E: serde::Deserializer<'de>,
{
let __field0: &'a [u8] = match <&'a [u8] as serde::Deserialize>::deserialize(__e) {
Ok(__val) => __val,
Err(__err) => {
return Err(__err);
}
};
Ok(oid::try_from_bytes(__field0).expect("hash of known length"))
}
#[inline]
fn visit_seq<__A>(self, mut __seq: __A) -> std::result::Result<Self::Value, __A::Error>
where
__A: serde::de::SeqAccess<'de>,
{
let __field0 = match match serde::de::SeqAccess::next_element::<&'a [u8]>(&mut __seq) {
Ok(__val) => __val,
Err(__err) => {
return Err(__err);
}
} {
Some(__value) => __value,
None => {
return Err(serde::de::Error::invalid_length(
0usize,
&"tuple struct Digest with 1 element",
));
}
};
Ok(oid::try_from_bytes(__field0).expect("hash of known length"))
}
}
serde::Deserializer::deserialize_newtype_struct(
deserializer,
"Digest",
__Visitor {
marker: std::marker::PhantomData::<&'a oid>,
lifetime: std::marker::PhantomData,
},
)
}
}