1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207
//! This crate is a Base32 encoder/decoder library.
//!
//! The library is intended to be used to implement an Onion address encoder/decoder.
//! The library uses RFC 4648 Base32 alphabet, but encoded string is lowercase by default.
//! The library does not support padding.
//!
//! # Examples
//!
//! ```rust
//! use koibumi_base32 as base32;
//!
//! let test = base32::encode(b"hello");
//! let expected = "nbswy3dp";
//! assert_eq!(test, expected);
//! ```
//!
//! ```rust
//! use koibumi_base32 as base32;
//!
//! let test = base32::decode("nbswy3dp")?;
//! let expected = b"hello";
//! assert_eq!(test, expected);
//! # Ok::<(), Box<dyn std::error::Error>>(())
//! ```
#![deny(unsafe_code)]
#![warn(missing_docs)]
#[macro_use]
extern crate lazy_static;
use std::fmt;
const ALPHABET: &[u8] = b"abcdefghijklmnopqrstuvwxyz234567";
const INVALID: u8 = ALPHABET.len() as u8;
lazy_static! {
static ref ALPHABET_INDEX: [u8; 0x100] = {
let mut index = [INVALID; 0x100];
for i in 0..ALPHABET.len() {
index[ALPHABET[i] as usize] = i as u8;
}
index
};
}
/// Encodes byte array into Base32 string.
///
/// The input is arbitrary `[u8]` slice
/// and the output is lowercase `String`.
/// Using lowercase RFC4648 alphabet and can be used for Onion addresses.
/// Padding is not supported.
///
/// # Examples
///
/// ```rust
/// use koibumi_base32 as base32;
///
/// let test = base32::encode(b"hello");
/// let expected = "nbswy3dp";
/// assert_eq!(test, expected);
/// ```
pub fn encode(bytes: impl AsRef<[u8]>) -> String {
let bytes = bytes.as_ref();
let mut encoded = Vec::new();
let mut i = 0;
let mut bit = 0;
while i < bytes.len() {
// 0 1 2 3 4 5 6 7
// |xxxxx xxx|xx xxxxx x|xxxx xxxx|x xxxxx xx|xxx xxxxx|
// 0 1 2 3 4
let c = if bit <= 3 {
bytes[i] >> (3 - bit)
} else if i + 1 < bytes.len() {
bytes[i] << (bit - 3) | bytes[i + 1] >> (11 - bit)
} else {
bytes[i] << (bit - 3)
} & 0x1f;
encoded.push(ALPHABET[c as usize]);
bit += 5;
if bit >= 8 {
i += 1;
bit -= 8;
}
}
String::from_utf8(encoded).unwrap()
}
#[test]
fn test_encode() {
assert_eq!(encode(b""), "");
assert_eq!(encode(b"f"), "my");
assert_eq!(encode(b"fo"), "mzxq");
assert_eq!(encode(b"foo"), "mzxw6");
assert_eq!(encode(b"foob"), "mzxw6yq");
assert_eq!(encode(b"fooba"), "mzxw6ytb");
assert_eq!(encode(b"foobar"), "mzxw6ytboi");
}
/// Indicates that an invalid Base32 character was found.
///
/// This error is used as the error type for the [`decode`] function.
///
/// [`decode`]: fn.decode.html
#[derive(Clone, PartialEq, Eq, Debug)]
pub struct InvalidCharacter(char);
impl InvalidCharacter {
/// Returns the actual character found invalid.
pub fn char(&self) -> char {
self.0
}
}
impl fmt::Display for InvalidCharacter {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let ch = self.0;
let code = u32::from(ch);
if ch.is_control() {
write!(f, "invalid character ({:#08x}) found", code)
} else {
write!(f, "invalid character '{}' ({:#08x}) found", ch, code)
}
}
}
impl std::error::Error for InvalidCharacter {}
fn to_num(ch: char) -> Result<u8, InvalidCharacter> {
let i = ch as usize;
if i > 0xff {
return Err(InvalidCharacter(ch));
}
let v = ALPHABET_INDEX[i];
if v == INVALID {
Err(InvalidCharacter(ch))
} else {
Ok(v)
}
}
/// Decodes Base32 string into byte array.
///
/// The input is Base32 encoded lowercase `str` reference
/// and the output is arbitrary `Vec<u8>`.
/// Using lowercase RFC4648 alphabet and can be used for Onion addresses.
/// Padding is not supported.
///
/// # Examples
///
/// ```rust
/// use koibumi_base32 as base32;
///
/// let test = base32::decode("nbswy3dp")?;
/// let expected = b"hello";
/// assert_eq!(test, expected);
/// # Ok::<(), Box<dyn std::error::Error>>(())
/// ```
pub fn decode(s: impl AsRef<str>) -> Result<Vec<u8>, InvalidCharacter> {
let s: Vec<char> = s.as_ref().chars().collect();
let mut decoded = Vec::new();
let mut i = 0;
let mut bit = 0;
let mut byte = 0;
while i < s.len() {
// 0 1 2 3 4
// |xxxxx xxx|xx xxxxx x|xxxx xxxx|x xxxxx xx|xxx xxxxx|
// 0 1 2 3 4 5 6 7
if bit <= 3 {
byte = byte << 5 | to_num(s[i])?;
if bit == 3 {
decoded.push(byte);
}
} else {
let n = to_num(s[i])?;
byte = byte << (8 - bit) | n >> (bit - 3);
decoded.push(byte);
byte = n;
}
bit += 5;
if bit >= 8 {
bit -= 8;
}
i += 1;
}
Ok(decoded)
}
#[test]
fn test_decode() {
assert_eq!(decode("").unwrap(), b"");
assert_eq!(decode("my").unwrap(), b"f");
assert_eq!(decode("mzxq").unwrap(), b"fo");
assert_eq!(decode("mzxw6").unwrap(), b"foo");
assert_eq!(decode("mzxw6yq").unwrap(), b"foob");
assert_eq!(decode("mzxw6ytb").unwrap(), b"fooba");
assert_eq!(decode("mzxw6ytboi").unwrap(), b"foobar");
}