pem_rfc7468/decoder.rs
1//! Decoder for PEM encapsulated data.
2//!
3//! From RFC 7468 Section 2:
4//!
5//! > Textual encoding begins with a line comprising "-----BEGIN ", a
6//! > label, and "-----", and ends with a line comprising "-----END ", a
7//! > label, and "-----". Between these lines, or "encapsulation
8//! > boundaries", are base64-encoded data according to Section 4 of
9//! > [RFC 4648].
10//!
11//! [RFC 4648]: https://datatracker.ietf.org/doc/html/rfc4648
12
13use crate::{
14 grammar, Base64Decoder, Error, Result, BASE64_WRAP_WIDTH, POST_ENCAPSULATION_BOUNDARY,
15 PRE_ENCAPSULATION_BOUNDARY,
16};
17use core::str;
18
19#[cfg(feature = "alloc")]
20use alloc::vec::Vec;
21
22#[cfg(feature = "std")]
23use std::io;
24
25/// Decode a PEM document according to RFC 7468's "Strict" grammar.
26///
27/// On success, writes the decoded document into the provided buffer, returning
28/// the decoded label and the portion of the provided buffer containing the
29/// decoded message.
30pub fn decode<'i, 'o>(pem: &'i [u8], buf: &'o mut [u8]) -> Result<(&'i str, &'o [u8])> {
31 let mut decoder = Decoder::new(pem).map_err(|e| check_for_headers(pem, e))?;
32 let type_label = decoder.type_label();
33 let buf = buf
34 .get_mut(..decoder.remaining_len())
35 .ok_or(Error::Length)?;
36 let decoded = decoder.decode(buf).map_err(|e| check_for_headers(pem, e))?;
37
38 if decoder.base64.is_finished() {
39 Ok((type_label, decoded))
40 } else {
41 Err(Error::Length)
42 }
43}
44
45/// Decode a PEM document according to RFC 7468's "Strict" grammar, returning
46/// the result as a [`Vec`] upon success.
47#[cfg(feature = "alloc")]
48pub fn decode_vec(pem: &[u8]) -> Result<(&str, Vec<u8>)> {
49 let mut decoder = Decoder::new(pem).map_err(|e| check_for_headers(pem, e))?;
50 let type_label = decoder.type_label();
51 let mut buf = Vec::new();
52 decoder
53 .decode_to_end(&mut buf)
54 .map_err(|e| check_for_headers(pem, e))?;
55 Ok((type_label, buf))
56}
57
58/// Decode the encapsulation boundaries of a PEM document according to RFC 7468's "Strict" grammar.
59///
60/// On success, returning the decoded label.
61pub fn decode_label(pem: &[u8]) -> Result<&str> {
62 Ok(Encapsulation::try_from(pem)?.label())
63}
64
65/// Buffered PEM decoder.
66///
67/// Stateful buffered decoder type which decodes an input PEM document according
68/// to RFC 7468's "Strict" grammar.
69#[derive(Clone)]
70pub struct Decoder<'i> {
71 /// PEM type label.
72 type_label: &'i str,
73
74 /// Buffered Base64 decoder.
75 base64: Base64Decoder<'i>,
76}
77
78impl<'i> Decoder<'i> {
79 /// Create a new PEM [`Decoder`] with the default options.
80 ///
81 /// Uses the default 64-character line wrapping.
82 pub fn new(pem: &'i [u8]) -> Result<Self> {
83 Self::new_wrapped(pem, BASE64_WRAP_WIDTH)
84 }
85
86 /// Create a new PEM [`Decoder`] which wraps at the given line width.
87 pub fn new_wrapped(pem: &'i [u8], line_width: usize) -> Result<Self> {
88 let encapsulation = Encapsulation::try_from(pem)?;
89 let type_label = encapsulation.label();
90 let base64 = Base64Decoder::new_wrapped(encapsulation.encapsulated_text, line_width)?;
91 Ok(Self { type_label, base64 })
92 }
93
94 /// Get the PEM type label for the input document.
95 pub fn type_label(&self) -> &'i str {
96 self.type_label
97 }
98
99 /// Decode data into the provided output buffer.
100 ///
101 /// There must be at least as much remaining Base64 input to be decoded
102 /// in order to completely fill `buf`.
103 pub fn decode<'o>(&mut self, buf: &'o mut [u8]) -> Result<&'o [u8]> {
104 Ok(self.base64.decode(buf)?)
105 }
106
107 /// Decode all of the remaining data in the input buffer into `buf`.
108 #[cfg(feature = "alloc")]
109 pub fn decode_to_end<'o>(&mut self, buf: &'o mut Vec<u8>) -> Result<&'o [u8]> {
110 Ok(self.base64.decode_to_end(buf)?)
111 }
112
113 /// Get the decoded length of the remaining PEM data after Base64 decoding.
114 pub fn remaining_len(&self) -> usize {
115 self.base64.remaining_len()
116 }
117
118 /// Are we finished decoding the PEM input?
119 pub fn is_finished(&self) -> bool {
120 self.base64.is_finished()
121 }
122}
123
124impl<'i> From<Decoder<'i>> for Base64Decoder<'i> {
125 fn from(decoder: Decoder<'i>) -> Base64Decoder<'i> {
126 decoder.base64
127 }
128}
129
130#[cfg(feature = "std")]
131impl<'i> io::Read for Decoder<'i> {
132 fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
133 self.base64.read(buf)
134 }
135
136 fn read_to_end(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
137 self.base64.read_to_end(buf)
138 }
139
140 fn read_exact(&mut self, buf: &mut [u8]) -> io::Result<()> {
141 self.base64.read_exact(buf)
142 }
143}
144
145/// PEM encapsulation parser.
146///
147/// This parser performs an initial pass over the data, locating the
148/// pre-encapsulation (`---BEGIN [...]---`) and post-encapsulation
149/// (`---END [...]`) boundaries while attempting to avoid branching
150/// on the potentially secret Base64-encoded data encapsulated between
151/// the two boundaries.
152///
153/// It only supports a single encapsulated message at present. Future work
154/// could potentially include extending it provide an iterator over a series
155/// of encapsulated messages.
156#[derive(Copy, Clone, Debug)]
157struct Encapsulation<'a> {
158 /// Type label extracted from the pre/post-encapsulation boundaries.
159 ///
160 /// From RFC 7468 Section 2:
161 ///
162 /// > The type of data encoded is labeled depending on the type label in
163 /// > the "-----BEGIN " line (pre-encapsulation boundary). For example,
164 /// > the line may be "-----BEGIN CERTIFICATE-----" to indicate that the
165 /// > content is a PKIX certificate (see further below). Generators MUST
166 /// > put the same label on the "-----END " line (post-encapsulation
167 /// > boundary) as the corresponding "-----BEGIN " line. Labels are
168 /// > formally case-sensitive, uppercase, and comprised of zero or more
169 /// > characters; they do not contain consecutive spaces or hyphen-minuses,
170 /// > nor do they contain spaces or hyphen-minuses at either end. Parsers
171 /// > MAY disregard the label in the post-encapsulation boundary instead of
172 /// > signaling an error if there is a label mismatch: some extant
173 /// > implementations require the labels to match; others do not.
174 label: &'a str,
175
176 /// Encapsulated text portion contained between the boundaries.
177 ///
178 /// This data should be encoded as Base64, however this type performs no
179 /// validation of it so it can be handled in constant-time.
180 encapsulated_text: &'a [u8],
181}
182
183impl<'a> Encapsulation<'a> {
184 /// Parse the type label and encapsulated text from between the
185 /// pre/post-encapsulation boundaries.
186 pub fn parse(data: &'a [u8]) -> Result<Self> {
187 // Strip the "preamble": optional text occurring before the pre-encapsulation boundary
188 let data = grammar::strip_preamble(data)?;
189
190 // Parse pre-encapsulation boundary (including label)
191 let data = data
192 .strip_prefix(PRE_ENCAPSULATION_BOUNDARY)
193 .ok_or(Error::PreEncapsulationBoundary)?;
194
195 let (label, body) = grammar::split_label(data).ok_or(Error::Label)?;
196
197 let mut body = match grammar::strip_trailing_eol(body).unwrap_or(body) {
198 [head @ .., b'-', b'-', b'-', b'-', b'-'] => head,
199 _ => return Err(Error::PreEncapsulationBoundary),
200 };
201
202 // Ensure body ends with a properly labeled post-encapsulation boundary
203 for &slice in [POST_ENCAPSULATION_BOUNDARY, label.as_bytes()].iter().rev() {
204 // Ensure the input ends with the post encapsulation boundary as
205 // well as a matching label
206 if !body.ends_with(slice) {
207 return Err(Error::PostEncapsulationBoundary);
208 }
209
210 let len = body.len().checked_sub(slice.len()).ok_or(Error::Length)?;
211 body = body.get(..len).ok_or(Error::PostEncapsulationBoundary)?;
212 }
213
214 let encapsulated_text =
215 grammar::strip_trailing_eol(body).ok_or(Error::PostEncapsulationBoundary)?;
216
217 Ok(Self {
218 label,
219 encapsulated_text,
220 })
221 }
222
223 /// Get the label parsed from the encapsulation boundaries.
224 pub fn label(self) -> &'a str {
225 self.label
226 }
227}
228
229impl<'a> TryFrom<&'a [u8]> for Encapsulation<'a> {
230 type Error = Error;
231
232 fn try_from(bytes: &'a [u8]) -> Result<Self> {
233 Self::parse(bytes)
234 }
235}
236
237/// Check for PEM headers in the input, as they are disallowed by RFC7468.
238///
239/// Returns `Error::HeaderDisallowed` if headers are encountered.
240fn check_for_headers(pem: &[u8], err: Error) -> Error {
241 if err == Error::Base64(base64ct::Error::InvalidEncoding)
242 && pem.iter().any(|&b| b == grammar::CHAR_COLON)
243 {
244 Error::HeaderDisallowed
245 } else {
246 err
247 }
248}
249
250#[cfg(test)]
251mod tests {
252 use super::Encapsulation;
253
254 #[test]
255 fn pkcs8_example() {
256 let pem = include_bytes!("../tests/examples/pkcs8.pem");
257 let encapsulation = Encapsulation::parse(pem).unwrap();
258 assert_eq!(encapsulation.label, "PRIVATE KEY");
259
260 assert_eq!(
261 encapsulation.encapsulated_text,
262 &[
263 77, 67, 52, 67, 65, 81, 65, 119, 66, 81, 89, 68, 75, 50, 86, 119, 66, 67, 73, 69,
264 73, 66, 102, 116, 110, 72, 80, 112, 50, 50, 83, 101, 119, 89, 109, 109, 69, 111,
265 77, 99, 88, 56, 86, 119, 73, 52, 73, 72, 119, 97, 113, 100, 43, 57, 76, 70, 80,
266 106, 47, 49, 53, 101, 113, 70
267 ]
268 );
269 }
270}