pem_rfc7468/
decoder.rs

1//! Decoder for PEM encapsulated data.
2//!
3//! From RFC 7468 Section 2:
4//!
5//! > Textual encoding begins with a line comprising "-----BEGIN ", a
6//! > label, and "-----", and ends with a line comprising "-----END ", a
7//! > label, and "-----".  Between these lines, or "encapsulation
8//! > boundaries", are base64-encoded data according to Section 4 of
9//! > [RFC 4648].
10//!
11//! [RFC 4648]: https://datatracker.ietf.org/doc/html/rfc4648
12
13use crate::{
14    grammar, Base64Decoder, Error, Result, BASE64_WRAP_WIDTH, POST_ENCAPSULATION_BOUNDARY,
15    PRE_ENCAPSULATION_BOUNDARY,
16};
17use core::str;
18
19#[cfg(feature = "alloc")]
20use alloc::vec::Vec;
21
22#[cfg(feature = "std")]
23use std::io;
24
25/// Decode a PEM document according to RFC 7468's "Strict" grammar.
26///
27/// On success, writes the decoded document into the provided buffer, returning
28/// the decoded label and the portion of the provided buffer containing the
29/// decoded message.
30pub fn decode<'i, 'o>(pem: &'i [u8], buf: &'o mut [u8]) -> Result<(&'i str, &'o [u8])> {
31    let mut decoder = Decoder::new(pem).map_err(|e| check_for_headers(pem, e))?;
32    let type_label = decoder.type_label();
33    let buf = buf
34        .get_mut(..decoder.remaining_len())
35        .ok_or(Error::Length)?;
36    let decoded = decoder.decode(buf).map_err(|e| check_for_headers(pem, e))?;
37
38    if decoder.base64.is_finished() {
39        Ok((type_label, decoded))
40    } else {
41        Err(Error::Length)
42    }
43}
44
45/// Decode a PEM document according to RFC 7468's "Strict" grammar, returning
46/// the result as a [`Vec`] upon success.
47#[cfg(feature = "alloc")]
48pub fn decode_vec(pem: &[u8]) -> Result<(&str, Vec<u8>)> {
49    let mut decoder = Decoder::new(pem).map_err(|e| check_for_headers(pem, e))?;
50    let type_label = decoder.type_label();
51    let mut buf = Vec::new();
52    decoder
53        .decode_to_end(&mut buf)
54        .map_err(|e| check_for_headers(pem, e))?;
55    Ok((type_label, buf))
56}
57
58/// Decode the encapsulation boundaries of a PEM document according to RFC 7468's "Strict" grammar.
59///
60/// On success, returning the decoded label.
61pub fn decode_label(pem: &[u8]) -> Result<&str> {
62    Ok(Encapsulation::try_from(pem)?.label())
63}
64
65/// Buffered PEM decoder.
66///
67/// Stateful buffered decoder type which decodes an input PEM document according
68/// to RFC 7468's "Strict" grammar.
69#[derive(Clone)]
70pub struct Decoder<'i> {
71    /// PEM type label.
72    type_label: &'i str,
73
74    /// Buffered Base64 decoder.
75    base64: Base64Decoder<'i>,
76}
77
78impl<'i> Decoder<'i> {
79    /// Create a new PEM [`Decoder`] with the default options.
80    ///
81    /// Uses the default 64-character line wrapping.
82    pub fn new(pem: &'i [u8]) -> Result<Self> {
83        Self::new_wrapped(pem, BASE64_WRAP_WIDTH)
84    }
85
86    /// Create a new PEM [`Decoder`] which wraps at the given line width.
87    pub fn new_wrapped(pem: &'i [u8], line_width: usize) -> Result<Self> {
88        let encapsulation = Encapsulation::try_from(pem)?;
89        let type_label = encapsulation.label();
90        let base64 = Base64Decoder::new_wrapped(encapsulation.encapsulated_text, line_width)?;
91        Ok(Self { type_label, base64 })
92    }
93
94    /// Get the PEM type label for the input document.
95    pub fn type_label(&self) -> &'i str {
96        self.type_label
97    }
98
99    /// Decode data into the provided output buffer.
100    ///
101    /// There must be at least as much remaining Base64 input to be decoded
102    /// in order to completely fill `buf`.
103    pub fn decode<'o>(&mut self, buf: &'o mut [u8]) -> Result<&'o [u8]> {
104        Ok(self.base64.decode(buf)?)
105    }
106
107    /// Decode all of the remaining data in the input buffer into `buf`.
108    #[cfg(feature = "alloc")]
109    pub fn decode_to_end<'o>(&mut self, buf: &'o mut Vec<u8>) -> Result<&'o [u8]> {
110        Ok(self.base64.decode_to_end(buf)?)
111    }
112
113    /// Get the decoded length of the remaining PEM data after Base64 decoding.
114    pub fn remaining_len(&self) -> usize {
115        self.base64.remaining_len()
116    }
117
118    /// Are we finished decoding the PEM input?
119    pub fn is_finished(&self) -> bool {
120        self.base64.is_finished()
121    }
122}
123
124impl<'i> From<Decoder<'i>> for Base64Decoder<'i> {
125    fn from(decoder: Decoder<'i>) -> Base64Decoder<'i> {
126        decoder.base64
127    }
128}
129
130#[cfg(feature = "std")]
131impl<'i> io::Read for Decoder<'i> {
132    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
133        self.base64.read(buf)
134    }
135
136    fn read_to_end(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
137        self.base64.read_to_end(buf)
138    }
139
140    fn read_exact(&mut self, buf: &mut [u8]) -> io::Result<()> {
141        self.base64.read_exact(buf)
142    }
143}
144
145/// PEM encapsulation parser.
146///
147/// This parser performs an initial pass over the data, locating the
148/// pre-encapsulation (`---BEGIN [...]---`) and post-encapsulation
149/// (`---END [...]`) boundaries while attempting to avoid branching
150/// on the potentially secret Base64-encoded data encapsulated between
151/// the two boundaries.
152///
153/// It only supports a single encapsulated message at present. Future work
154/// could potentially include extending it provide an iterator over a series
155/// of encapsulated messages.
156#[derive(Copy, Clone, Debug)]
157struct Encapsulation<'a> {
158    /// Type label extracted from the pre/post-encapsulation boundaries.
159    ///
160    /// From RFC 7468 Section 2:
161    ///
162    /// > The type of data encoded is labeled depending on the type label in
163    /// > the "-----BEGIN " line (pre-encapsulation boundary).  For example,
164    /// > the line may be "-----BEGIN CERTIFICATE-----" to indicate that the
165    /// > content is a PKIX certificate (see further below).  Generators MUST
166    /// > put the same label on the "-----END " line (post-encapsulation
167    /// > boundary) as the corresponding "-----BEGIN " line.  Labels are
168    /// > formally case-sensitive, uppercase, and comprised of zero or more
169    /// > characters; they do not contain consecutive spaces or hyphen-minuses,
170    /// > nor do they contain spaces or hyphen-minuses at either end.  Parsers
171    /// > MAY disregard the label in the post-encapsulation boundary instead of
172    /// > signaling an error if there is a label mismatch: some extant
173    /// > implementations require the labels to match; others do not.
174    label: &'a str,
175
176    /// Encapsulated text portion contained between the boundaries.
177    ///
178    /// This data should be encoded as Base64, however this type performs no
179    /// validation of it so it can be handled in constant-time.
180    encapsulated_text: &'a [u8],
181}
182
183impl<'a> Encapsulation<'a> {
184    /// Parse the type label and encapsulated text from between the
185    /// pre/post-encapsulation boundaries.
186    pub fn parse(data: &'a [u8]) -> Result<Self> {
187        // Strip the "preamble": optional text occurring before the pre-encapsulation boundary
188        let data = grammar::strip_preamble(data)?;
189
190        // Parse pre-encapsulation boundary (including label)
191        let data = data
192            .strip_prefix(PRE_ENCAPSULATION_BOUNDARY)
193            .ok_or(Error::PreEncapsulationBoundary)?;
194
195        let (label, body) = grammar::split_label(data).ok_or(Error::Label)?;
196
197        let mut body = match grammar::strip_trailing_eol(body).unwrap_or(body) {
198            [head @ .., b'-', b'-', b'-', b'-', b'-'] => head,
199            _ => return Err(Error::PreEncapsulationBoundary),
200        };
201
202        // Ensure body ends with a properly labeled post-encapsulation boundary
203        for &slice in [POST_ENCAPSULATION_BOUNDARY, label.as_bytes()].iter().rev() {
204            // Ensure the input ends with the post encapsulation boundary as
205            // well as a matching label
206            if !body.ends_with(slice) {
207                return Err(Error::PostEncapsulationBoundary);
208            }
209
210            let len = body.len().checked_sub(slice.len()).ok_or(Error::Length)?;
211            body = body.get(..len).ok_or(Error::PostEncapsulationBoundary)?;
212        }
213
214        let encapsulated_text =
215            grammar::strip_trailing_eol(body).ok_or(Error::PostEncapsulationBoundary)?;
216
217        Ok(Self {
218            label,
219            encapsulated_text,
220        })
221    }
222
223    /// Get the label parsed from the encapsulation boundaries.
224    pub fn label(self) -> &'a str {
225        self.label
226    }
227}
228
229impl<'a> TryFrom<&'a [u8]> for Encapsulation<'a> {
230    type Error = Error;
231
232    fn try_from(bytes: &'a [u8]) -> Result<Self> {
233        Self::parse(bytes)
234    }
235}
236
237/// Check for PEM headers in the input, as they are disallowed by RFC7468.
238///
239/// Returns `Error::HeaderDisallowed` if headers are encountered.
240fn check_for_headers(pem: &[u8], err: Error) -> Error {
241    if err == Error::Base64(base64ct::Error::InvalidEncoding)
242        && pem.iter().any(|&b| b == grammar::CHAR_COLON)
243    {
244        Error::HeaderDisallowed
245    } else {
246        err
247    }
248}
249
250#[cfg(test)]
251mod tests {
252    use super::Encapsulation;
253
254    #[test]
255    fn pkcs8_example() {
256        let pem = include_bytes!("../tests/examples/pkcs8.pem");
257        let encapsulation = Encapsulation::parse(pem).unwrap();
258        assert_eq!(encapsulation.label, "PRIVATE KEY");
259
260        assert_eq!(
261            encapsulation.encapsulated_text,
262            &[
263                77, 67, 52, 67, 65, 81, 65, 119, 66, 81, 89, 68, 75, 50, 86, 119, 66, 67, 73, 69,
264                73, 66, 102, 116, 110, 72, 80, 112, 50, 50, 83, 101, 119, 89, 109, 109, 69, 111,
265                77, 99, 88, 56, 86, 119, 73, 52, 73, 72, 119, 97, 113, 100, 43, 57, 76, 70, 80,
266                106, 47, 49, 53, 101, 113, 70
267            ]
268        );
269    }
270}