koibumi_base32/
lib.rs

1//! This crate is a Base32 encoder/decoder library.
2//!
3//! The library is intended to be used to implement an Onion address encoder/decoder.
4//! The library uses RFC 4648 Base32 alphabet, but encoded string is lowercase by default.
5//! The library does not support padding.
6//!
7//! # Examples
8//!
9//! ```rust
10//! use koibumi_base32 as base32;
11//!
12//! let test = base32::encode(b"hello");
13//! let expected = "nbswy3dp";
14//! assert_eq!(test, expected);
15//! ```
16//!
17//! ```rust
18//! use koibumi_base32 as base32;
19//!
20//! let test = base32::decode("nbswy3dp")?;
21//! let expected = b"hello";
22//! assert_eq!(test, expected);
23//! # Ok::<(), Box<dyn std::error::Error>>(())
24//! ```
25
26#![deny(unsafe_code)]
27#![warn(missing_docs)]
28
29#[macro_use]
30extern crate lazy_static;
31
32use std::fmt;
33
34const ALPHABET: &[u8] = b"abcdefghijklmnopqrstuvwxyz234567";
35const INVALID: u8 = ALPHABET.len() as u8;
36
37lazy_static! {
38    static ref ALPHABET_INDEX: [u8; 0x100] = {
39        let mut index = [INVALID; 0x100];
40        for i in 0..ALPHABET.len() {
41            index[ALPHABET[i] as usize] = i as u8;
42        }
43        index
44    };
45}
46
47/// Encodes byte array into Base32 string.
48///
49/// The input is arbitrary `[u8]` slice
50/// and the output is lowercase `String`.
51/// Using lowercase RFC4648 alphabet and can be used for Onion addresses.
52/// Padding is not supported.
53///
54/// # Examples
55///
56/// ```rust
57/// use koibumi_base32 as base32;
58///
59/// let test = base32::encode(b"hello");
60/// let expected = "nbswy3dp";
61/// assert_eq!(test, expected);
62/// ```
63pub fn encode(bytes: impl AsRef<[u8]>) -> String {
64    let bytes = bytes.as_ref();
65
66    let mut encoded = Vec::new();
67
68    let mut i = 0;
69    let mut bit = 0;
70    while i < bytes.len() {
71        // 0      1      2     3      4      5     6      7
72        // |xxxxx xxx|xx xxxxx x|xxxx xxxx|x xxxxx xx|xxx xxxxx|
73        // 0         1          2         3          4
74        let c = if bit <= 3 {
75            bytes[i] >> (3 - bit)
76        } else if i + 1 < bytes.len() {
77            bytes[i] << (bit - 3) | bytes[i + 1] >> (11 - bit)
78        } else {
79            bytes[i] << (bit - 3)
80        } & 0x1f;
81
82        encoded.push(ALPHABET[c as usize]);
83
84        bit += 5;
85        if bit >= 8 {
86            i += 1;
87            bit -= 8;
88        }
89    }
90
91    String::from_utf8(encoded).unwrap()
92}
93
94#[test]
95fn test_encode() {
96    assert_eq!(encode(b""), "");
97    assert_eq!(encode(b"f"), "my");
98    assert_eq!(encode(b"fo"), "mzxq");
99    assert_eq!(encode(b"foo"), "mzxw6");
100    assert_eq!(encode(b"foob"), "mzxw6yq");
101    assert_eq!(encode(b"fooba"), "mzxw6ytb");
102    assert_eq!(encode(b"foobar"), "mzxw6ytboi");
103}
104
105/// Indicates that an invalid Base32 character was found.
106///
107/// This error is used as the error type for the [`decode`] function.
108///
109/// [`decode`]: fn.decode.html
110#[derive(Clone, PartialEq, Eq, Debug)]
111pub struct InvalidCharacter(char);
112
113impl InvalidCharacter {
114    /// Returns the actual character found invalid.
115    pub fn char(&self) -> char {
116        self.0
117    }
118}
119
120impl fmt::Display for InvalidCharacter {
121    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
122        let ch = self.0;
123        let code = u32::from(ch);
124        if ch.is_control() {
125            write!(f, "invalid character ({:#08x}) found", code)
126        } else {
127            write!(f, "invalid character '{}' ({:#08x}) found", ch, code)
128        }
129    }
130}
131
132impl std::error::Error for InvalidCharacter {}
133
134fn to_num(ch: char) -> Result<u8, InvalidCharacter> {
135    let i = ch as usize;
136    if i > 0xff {
137        return Err(InvalidCharacter(ch));
138    }
139    let v = ALPHABET_INDEX[i];
140    if v == INVALID {
141        Err(InvalidCharacter(ch))
142    } else {
143        Ok(v)
144    }
145}
146
147/// Decodes Base32 string into byte array.
148///
149/// The input is Base32 encoded lowercase `str` reference
150/// and the output is arbitrary `Vec<u8>`.
151/// Using lowercase RFC4648 alphabet and can be used for Onion addresses.
152/// Padding is not supported.
153///
154/// # Examples
155///
156/// ```rust
157/// use koibumi_base32 as base32;
158///
159/// let test = base32::decode("nbswy3dp")?;
160/// let expected = b"hello";
161/// assert_eq!(test, expected);
162/// # Ok::<(), Box<dyn std::error::Error>>(())
163/// ```
164pub fn decode(s: impl AsRef<str>) -> Result<Vec<u8>, InvalidCharacter> {
165    let s: Vec<char> = s.as_ref().chars().collect();
166
167    let mut decoded = Vec::new();
168
169    let mut i = 0;
170    let mut bit = 0;
171    let mut byte = 0;
172    while i < s.len() {
173        // 0         1          2         3          4
174        // |xxxxx xxx|xx xxxxx x|xxxx xxxx|x xxxxx xx|xxx xxxxx|
175        // 0      1      2     3      4      5     6      7
176        if bit <= 3 {
177            byte = byte << 5 | to_num(s[i])?;
178            if bit == 3 {
179                decoded.push(byte);
180            }
181        } else {
182            let n = to_num(s[i])?;
183            byte = byte << (8 - bit) | n >> (bit - 3);
184            decoded.push(byte);
185            byte = n;
186        }
187
188        bit += 5;
189        if bit >= 8 {
190            bit -= 8;
191        }
192        i += 1;
193    }
194
195    Ok(decoded)
196}
197
198#[test]
199fn test_decode() {
200    assert_eq!(decode("").unwrap(), b"");
201    assert_eq!(decode("my").unwrap(), b"f");
202    assert_eq!(decode("mzxq").unwrap(), b"fo");
203    assert_eq!(decode("mzxw6").unwrap(), b"foo");
204    assert_eq!(decode("mzxw6yq").unwrap(), b"foob");
205    assert_eq!(decode("mzxw6ytb").unwrap(), b"fooba");
206    assert_eq!(decode("mzxw6ytboi").unwrap(), b"foobar");
207}