rustc_serialize/
base64.rs

1// Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
2// file at the top-level directory of this distribution and at
3// http://rust-lang.org/COPYRIGHT.
4//
5// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8// option. This file may not be copied, modified, or distributed
9// except according to those terms.
10//
11// ignore-lexer-test FIXME #15679
12
13//! Base64 binary-to-text encoding
14
15pub use self::FromBase64Error::*;
16pub use self::CharacterSet::*;
17
18use std::fmt;
19use std::error;
20
21/// Available encoding character sets
22#[derive(Clone, Copy, Debug)]
23pub enum CharacterSet {
24    /// The standard character set (uses `+` and `/`)
25    Standard,
26    /// The URL safe character set (uses `-` and `_`)
27    UrlSafe
28}
29
30/// Available newline types
31#[derive(Clone, Copy, Debug)]
32pub enum Newline {
33    /// A linefeed (i.e. Unix-style newline)
34    LF,
35    /// A carriage return and a linefeed (i.e. Windows-style newline)
36    CRLF
37}
38
39/// Contains configuration parameters for `to_base64`.
40#[derive(Clone, Copy, Debug)]
41pub struct Config {
42    /// Character set to use
43    pub char_set: CharacterSet,
44    /// Newline to use
45    pub newline: Newline,
46    /// True to pad output with `=` characters
47    pub pad: bool,
48    /// `Some(len)` to wrap lines at `len`, `None` to disable line wrapping
49    pub line_length: Option<usize>
50}
51
52/// Configuration for RFC 4648 standard base64 encoding
53pub static STANDARD: Config =
54    Config {char_set: Standard, newline: Newline::CRLF, pad: true, line_length: None};
55
56/// Configuration for RFC 4648 base64url encoding
57pub static URL_SAFE: Config =
58    Config {char_set: UrlSafe, newline: Newline::CRLF, pad: false, line_length: None};
59
60/// Configuration for RFC 2045 MIME base64 encoding
61pub static MIME: Config =
62    Config {char_set: Standard, newline: Newline::CRLF, pad: true, line_length: Some(76)};
63
64static STANDARD_CHARS: &'static[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ\
65                                        abcdefghijklmnopqrstuvwxyz\
66                                        0123456789+/";
67
68static URLSAFE_CHARS: &'static[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ\
69                                       abcdefghijklmnopqrstuvwxyz\
70                                       0123456789-_";
71
72/// A trait for converting a value to base64 encoding.
73pub trait ToBase64 {
74    /// Converts the value of `self` to a base64 value following the specified
75    /// format configuration, returning the owned string.
76    fn to_base64(&self, config: Config) -> String;
77}
78
79impl ToBase64 for [u8] {
80    /// Turn a vector of `u8` bytes into a base64 string.
81    ///
82    /// # Example
83    ///
84    /// ```rust
85    /// extern crate rustc_serialize;
86    /// use rustc_serialize::base64::{ToBase64, STANDARD};
87    ///
88    /// fn main () {
89    ///     let str = [52,32].to_base64(STANDARD);
90    ///     println!("base 64 output: {:?}", str);
91    /// }
92    /// ```
93    fn to_base64(&self, config: Config) -> String {
94        let bytes = match config.char_set {
95            Standard => STANDARD_CHARS,
96            UrlSafe => URLSAFE_CHARS
97        };
98
99        let len = self.len();
100        let newline = match config.newline {
101            Newline::LF => "\n",
102            Newline::CRLF => "\r\n",
103        };
104
105        // Preallocate memory.
106        let mut prealloc_len = (len + 2) / 3 * 4;
107        if let Some(line_length) = config.line_length {
108            let num_lines = match prealloc_len {
109                0 => 0,
110                n => (n - 1) / line_length
111            };
112            prealloc_len += num_lines * newline.bytes().count();
113        }
114
115        let mut out_bytes = vec![b'='; prealloc_len];
116
117        // Deal with padding bytes
118        let mod_len = len % 3;
119
120        // Use iterators to reduce branching
121        {
122            let mut cur_length = 0;
123
124            let mut s_in = self[..len - mod_len].iter().map(|&x| x as u32);
125            let mut s_out = out_bytes.iter_mut();
126
127            // Convenient shorthand
128            let enc = |val| bytes[val as usize];
129            let mut write = |val| *s_out.next().unwrap() = val;
130
131            // Iterate though blocks of 4
132            while let (Some(first), Some(second), Some(third)) =
133                        (s_in.next(), s_in.next(), s_in.next()) {
134
135                // Line break if needed
136                if let Some(line_length) = config.line_length {
137                    if cur_length >= line_length {
138                        for b in newline.bytes() { write(b) };
139                        cur_length = 0;
140                    }
141                }
142
143                let n = first << 16 | second << 8 | third;
144
145                // This 24-bit number gets separated into four 6-bit numbers.
146                write(enc((n >> 18) & 63));
147                write(enc((n >> 12) & 63));
148                write(enc((n >> 6 ) & 63));
149                write(enc((n >> 0 ) & 63));
150
151                cur_length += 4;
152            }
153
154            // Line break only needed if padding is required
155            if mod_len != 0 {
156                if let Some(line_length) = config.line_length {
157                    if cur_length >= line_length {
158                        for b in newline.bytes() { write(b) };
159                    }
160                }
161            }
162
163            // Heh, would be cool if we knew this was exhaustive
164            // (the dream of bounded integer types)
165            match mod_len {
166                0 => (),
167                1 => {
168                    let n = (self[len-1] as u32) << 16;
169                    write(enc((n >> 18) & 63));
170                    write(enc((n >> 12) & 63));
171                }
172                2 => {
173                    let n = (self[len-2] as u32) << 16 |
174                            (self[len-1] as u32) << 8;
175                    write(enc((n >> 18) & 63));
176                    write(enc((n >> 12) & 63));
177                    write(enc((n >> 6 ) & 63));
178                }
179                _ => panic!("Algebra is broken, please alert the math police")
180            }
181        }
182
183        // We get padding for "free", so only have to drop it if unwanted.
184        if !config.pad {
185            while let Some(&b'=') = out_bytes.last() {
186                out_bytes.pop();
187            }
188        }
189
190        unsafe { String::from_utf8_unchecked(out_bytes) }
191    }
192}
193
194impl<'a, T: ?Sized + ToBase64> ToBase64 for &'a T {
195    fn to_base64(&self, config: Config) -> String {
196        (**self).to_base64(config)
197    }
198}
199
200/// A trait for converting from base64 encoded values.
201pub trait FromBase64 {
202    /// Converts the value of `self`, interpreted as base64 encoded data, into
203    /// an owned vector of bytes, returning the vector.
204    fn from_base64(&self) -> Result<Vec<u8>, FromBase64Error>;
205}
206
207/// Errors that can occur when decoding a base64 encoded string
208#[derive(Clone, Copy)]
209pub enum FromBase64Error {
210    /// The input contained a character not part of the base64 format
211    InvalidBase64Byte(u8, usize),
212    /// The input had an invalid length
213    InvalidBase64Length,
214}
215
216impl fmt::Debug for FromBase64Error {
217    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
218        match *self {
219            InvalidBase64Byte(ch, idx) =>
220                write!(f, "Invalid character '{}' at position {}", ch, idx),
221            InvalidBase64Length => write!(f, "Invalid length"),
222        }
223    }
224}
225
226impl error::Error for FromBase64Error {
227    fn description(&self) -> &str {
228        match *self {
229            InvalidBase64Byte(_, _) => "invalid character",
230            InvalidBase64Length => "invalid length",
231        }
232    }
233}
234
235impl fmt::Display for FromBase64Error {
236    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
237        fmt::Debug::fmt(&self, f)
238    }
239}
240
241impl FromBase64 for str {
242    /// Convert any base64 encoded string (literal, `@`, `&`, or `~`)
243    /// to the byte values it encodes.
244    ///
245    /// You can use the `String::from_utf8` function to turn a `Vec<u8>` into a
246    /// string with characters corresponding to those values.
247    ///
248    /// # Example
249    ///
250    /// This converts a string literal to base64 and back.
251    ///
252    /// ```rust
253    /// extern crate rustc_serialize;
254    /// use rustc_serialize::base64::{ToBase64, FromBase64, STANDARD};
255    ///
256    /// fn main () {
257    ///     let hello_str = b"Hello, World".to_base64(STANDARD);
258    ///     println!("base64 output: {}", hello_str);
259    ///     let res = hello_str.from_base64();
260    ///     if res.is_ok() {
261    ///       let opt_bytes = String::from_utf8(res.unwrap());
262    ///       if opt_bytes.is_ok() {
263    ///         println!("decoded from base64: {:?}", opt_bytes.unwrap());
264    ///       }
265    ///     }
266    /// }
267    /// ```
268    #[inline]
269    fn from_base64(&self) -> Result<Vec<u8>, FromBase64Error> {
270        self.as_bytes().from_base64()
271    }
272}
273
274impl FromBase64 for [u8] {
275    fn from_base64(&self) -> Result<Vec<u8>, FromBase64Error> {
276        let mut r = Vec::with_capacity(self.len());
277        let mut buf: u32 = 0;
278        let mut modulus = 0;
279
280        let mut it = self.iter();
281        for byte in it.by_ref() {
282            let code = DECODE_TABLE[*byte as usize];
283            if code >= SPECIAL_CODES_START {
284                match code {
285                    NEWLINE_CODE => continue,
286                    EQUALS_CODE => break,
287                    INVALID_CODE => return Err(InvalidBase64Byte(
288                            *byte, (byte as *const _ as usize) - self.as_ptr() as usize)),
289                    _ => unreachable!(),
290                }
291            }
292            buf = (buf | code as u32) << 6;
293            modulus += 1;
294            if modulus == 4 {
295                modulus = 0;
296                r.push((buf >> 22) as u8);
297                r.push((buf >> 14) as u8);
298                r.push((buf >> 6 ) as u8);
299            }
300        }
301
302        for byte in it {
303            match *byte {
304                b'=' | b'\r' | b'\n' => continue,
305                _ => return Err(InvalidBase64Byte(
306                        *byte, (byte as *const _ as usize) - self.as_ptr() as usize)),
307            }
308        }
309
310        match modulus {
311            2 => {
312                r.push((buf >> 10) as u8);
313            }
314            3 => {
315                r.push((buf >> 16) as u8);
316                r.push((buf >> 8 ) as u8);
317            }
318            0 => (),
319            _ => return Err(InvalidBase64Length),
320        }
321
322        Ok(r)
323    }
324}
325
326impl<'a, T: ?Sized + FromBase64> FromBase64 for &'a T {
327    fn from_base64(&self) -> Result<Vec<u8>, FromBase64Error> {
328        (**self).from_base64()
329    }
330}
331
332/// Base64 decoding lookup table, generated using:
333///
334/// ```
335/// let mut ch = 0u8;
336/// for ch in 0..255 {
337///     let mut ch = ch as u8;
338///     let code = match ch {
339///         b'A'...b'Z' => ch - 0x41,
340///         b'a'...b'z' => ch - 0x47,
341///         b'0'...b'9' => ch + 0x04,
342///         b'+' | b'-' => 0x3E,
343///         b'/' | b'_' => 0x3F,
344///         b'=' => 0xFE,
345///         b'\r' | b'\n' => 0xFD,
346///         _ => 0xFF,
347///     };
348///     print!("0x{:02X}, ", code);
349///     if ch % 16  == 15 { println!(""); }
350///     else if ch == 0xFF { break; }
351///     ch += 1;
352/// }
353/// println!("");
354/// ```
355const DECODE_TABLE: [u8; 256] = [
356    0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFD, 0xFF, 0xFF, 0xFD, 0xFF, 0xFF,
357    0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
358    0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x3E, 0xFF, 0x3E, 0xFF, 0x3F,
359    0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF,
360    0xFF, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E,
361    0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0xFF, 0xFF, 0xFF, 0xFF, 0x3F,
362    0xFF, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
363    0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, 0x30, 0x31, 0x32, 0x33, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
364    0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
365    0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
366    0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
367    0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
368    0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
369    0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
370    0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
371    0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
372];
373const INVALID_CODE: u8 = 0xFF;
374const EQUALS_CODE: u8 = 0xFE;
375const NEWLINE_CODE: u8 = 0xFD;
376const SPECIAL_CODES_START: u8 = NEWLINE_CODE;
377
378#[cfg(test)]
379mod tests {
380    use base64::{Config, Newline, FromBase64, ToBase64, STANDARD, URL_SAFE};
381
382    #[test]
383    fn test_to_base64_basic() {
384        assert_eq!("".as_bytes().to_base64(STANDARD), "");
385        assert_eq!("f".as_bytes().to_base64(STANDARD), "Zg==");
386        assert_eq!("fo".as_bytes().to_base64(STANDARD), "Zm8=");
387        assert_eq!("foo".as_bytes().to_base64(STANDARD), "Zm9v");
388        assert_eq!("foob".as_bytes().to_base64(STANDARD), "Zm9vYg==");
389        assert_eq!("fooba".as_bytes().to_base64(STANDARD), "Zm9vYmE=");
390        assert_eq!("foobar".as_bytes().to_base64(STANDARD), "Zm9vYmFy");
391    }
392
393    #[test]
394    fn test_to_base64_crlf_line_break() {
395        assert!(![0; 1000].to_base64(Config {line_length: None, ..STANDARD})
396                              .contains("\r\n"));
397        assert_eq!(b"foobar".to_base64(Config {line_length: Some(4),
398                                               ..STANDARD}),
399                   "Zm9v\r\nYmFy");
400    }
401
402    #[test]
403    fn test_to_base64_lf_line_break() {
404        assert!(![0; 1000].to_base64(Config {line_length: None,
405                                                 newline: Newline::LF,
406                                                 ..STANDARD})
407                              .contains("\n"));
408        assert_eq!(b"foobar".to_base64(Config {line_length: Some(4),
409                                               newline: Newline::LF,
410                                               ..STANDARD}),
411                   "Zm9v\nYmFy");
412    }
413
414    #[test]
415    fn test_to_base64_padding() {
416        assert_eq!("f".as_bytes().to_base64(Config {pad: false, ..STANDARD}), "Zg");
417        assert_eq!("fo".as_bytes().to_base64(Config {pad: false, ..STANDARD}), "Zm8");
418    }
419
420    #[test]
421    fn test_to_base64_url_safe() {
422        assert_eq!([251, 255].to_base64(URL_SAFE), "-_8");
423        assert_eq!([251, 255].to_base64(STANDARD), "+/8=");
424    }
425
426    #[test]
427    fn test_to_base64_empty_line_length() {
428        [].to_base64(Config {line_length: Some(72), ..STANDARD});
429    }
430
431    #[test]
432    fn test_from_base64_basic() {
433        assert_eq!("".from_base64().unwrap(), b"");
434        assert_eq!("Zg==".from_base64().unwrap(), b"f");
435        assert_eq!("Zm8=".from_base64().unwrap(), b"fo");
436        assert_eq!("Zm9v".from_base64().unwrap(), b"foo");
437        assert_eq!("Zm9vYg==".from_base64().unwrap(), b"foob");
438        assert_eq!("Zm9vYmE=".from_base64().unwrap(), b"fooba");
439        assert_eq!("Zm9vYmFy".from_base64().unwrap(), b"foobar");
440    }
441
442    #[test]
443    fn test_from_base64_bytes() {
444        assert_eq!(b"Zm9vYmFy".from_base64().unwrap(), b"foobar");
445    }
446
447    #[test]
448    fn test_from_base64_newlines() {
449        assert_eq!("Zm9v\r\nYmFy".from_base64().unwrap(),
450                   b"foobar");
451        assert_eq!("Zm9vYg==\r\n".from_base64().unwrap(),
452                   b"foob");
453        assert_eq!("Zm9v\nYmFy".from_base64().unwrap(),
454                   b"foobar");
455        assert_eq!("Zm9vYg==\n".from_base64().unwrap(),
456                   b"foob");
457    }
458
459    #[test]
460    fn test_from_base64_urlsafe() {
461        assert_eq!("-_8".from_base64().unwrap(), "+/8=".from_base64().unwrap());
462    }
463
464    #[test]
465    fn test_from_base64_invalid_char() {
466        assert!("Zm$=".from_base64().is_err());
467        assert!("Zg==$".from_base64().is_err());
468    }
469
470    #[test]
471    fn test_from_base64_invalid_padding() {
472        assert!("Z===".from_base64().is_err());
473    }
474
475    #[test]
476    fn test_base64_random() {
477        use rand::{thread_rng, Rng};
478
479        for _ in 0..1000 {
480            let times = thread_rng().gen_range(1, 100);
481            let v = thread_rng().gen_iter::<u8>().take(times)
482                                .collect::<Vec<_>>();
483            assert_eq!(v.to_base64(STANDARD)
484                        .from_base64()
485                        .unwrap(),
486                       v);
487        }
488    }
489}