auditable_info/
lib.rs

1#![forbid(unsafe_code)]
2
3//! High-level crate to extract the dependency trees embedded in binaries by [`cargo auditable`](https://crates.io/crates/cargo-auditable).
4//!
5//! Deserializes them to a JSON string or Rust data structures, at your option.
6//!
7//! ```rust, ignore
8//! // Uses the default limits: 1GiB input file size, 8MiB audit data size
9//! let info = audit_info_from_file(&PathBuf::from("path/to/file"), Default::default())?;
10//! ```
11//! Functions to load the data from a `Read` instance or from `&[u8]` are also provided.
12//!
13//! The supported formats are [ELF](https://en.wikipedia.org/wiki/Executable_and_Linkable_Format),
14//! [PE](https://en.wikipedia.org/wiki/Portable_Executable),
15//! [Mach-O](https://en.wikipedia.org/wiki/Mach-O) and [WebAssembly](https://en.wikipedia.org/wiki/WebAssembly).
16//!
17//! If you need an even lower-level interface than the one provided by this crate,
18//! use the [`auditable-extract`](http://docs.rs/auditable-extract/) and
19//! [`auditable-serde`](http://docs.rs/auditable-serde/) crates.
20
21use auditable_extract::raw_auditable_data;
22#[cfg(feature = "serde")]
23use auditable_serde::VersionInfo;
24use miniz_oxide::inflate::decompress_to_vec_zlib_with_limit;
25use std::fs::File;
26use std::io::{BufRead, BufReader, Read};
27use std::path::Path;
28
29mod error;
30
31pub use crate::error::*;
32
33/// Loads audit info from the specified binary compiled with `cargo auditable`.
34///
35/// The entire file is loaded into memory. The RAM usage limit can be configured using the [`Limits`] struct.
36///
37/// ```rust, ignore
38/// // Uses the default limits: 1GiB input file size, 8MiB audit data size
39/// let info = audit_info_from_file(&PathBuf::from("path/to/file"), Default::default())?;
40/// ```
41///
42/// The data is validated to only have a single root package and not contain any circular dependencies.
43#[cfg(feature = "serde")]
44pub fn audit_info_from_file(path: &Path, limits: Limits) -> Result<VersionInfo, Error> {
45    Ok(serde_json::from_str(&json_from_file(path, limits)?)?)
46}
47
48/// Extracts the audit data from the specified binary and returns the JSON string.
49/// This is useful if you want to forward the data somewhere instead of parsing it to Rust data structures.
50///
51/// If you want to obtain the Zlib-compressed data instead,
52/// use the [`auditable-extract`](https://docs.rs/auditable-extract/) crate directly.
53pub fn json_from_file(path: &Path, limits: Limits) -> Result<String, Error> {
54    let file = File::open(path)?;
55    let mut reader = BufReader::new(file);
56    json_from_reader(&mut reader, limits)
57}
58
59/// Loads audit info from the binary loaded from an arbitrary reader, e.g. the standard input.
60///
61/// ```rust, ignore
62/// let stdin = io::stdin();
63/// let mut handle = stdin.lock();
64/// // Uses the default limits: 1GiB input file size, 8MiB audit data size
65/// let info = audit_info_from_reader(&mut handle, Default::default())?;
66/// ```
67///
68/// The data is validated to only have a single root package and not contain any circular dependencies.
69#[cfg(feature = "serde")]
70pub fn audit_info_from_reader<T: BufRead>(
71    reader: &mut T,
72    limits: Limits,
73) -> Result<VersionInfo, Error> {
74    Ok(serde_json::from_str(&json_from_reader(reader, limits)?)?)
75}
76
77/// Extracts the audit data and returns the JSON string.
78/// This is useful if you want to forward the data somewhere instead of parsing it to Rust data structures.
79///
80/// If you want to obtain the Zlib-compressed data instead,
81/// use the [`auditable-extract`](https://docs.rs/auditable-extract/) crate directly.
82pub fn json_from_reader<T: BufRead>(reader: &mut T, limits: Limits) -> Result<String, Error> {
83    let compressed_data = get_compressed_audit_data(reader, limits)?;
84    let decompressed_data =
85        decompress_to_vec_zlib_with_limit(&compressed_data, limits.decompressed_json_size)
86            .map_err(DecompressError::from_miniz)?;
87    Ok(String::from_utf8(decompressed_data)?)
88}
89
90// Factored into its own function for ease of unit testing,
91// and also so that the large allocation of the input file is dropped
92// before we start decompressing the data to minimize peak memory usage
93fn get_compressed_audit_data<T: BufRead>(reader: &mut T, limits: Limits) -> Result<Vec<u8>, Error> {
94    // In case you're wondering why the check for the limit is weird like that:
95    // When .take() returns EOF, it doesn't tell you if that's because it reached the limit
96    // or because the underlying reader ran out of data.
97    // And we need to return an error when the reader is over limit, else we'll truncate the audit data.
98    // So it would be reasonable to run `into_inner()` and check if that reader has any data remaining...
99    // But readers can return EOF sporadically - a reader may return EOF,
100    // then get more data and return bytes again instead of EOF!
101    // So instead we read as many bytes as the limit allows, plus one.
102    // If we've read the limit-plus-one bytes, that means the underlying reader was at least one byte over the limit.
103    // That way we avoid any time-of-check/time-of-use issues.
104    let incremented_limit = u64::saturating_add(limits.input_file_size as u64, 1);
105    let mut f = reader.take(incremented_limit);
106    let mut input_binary = Vec::new();
107    f.read_to_end(&mut input_binary)?;
108    if input_binary.len() as u64 == incremented_limit {
109        Err(Error::InputLimitExceeded)?
110    }
111    let compressed_audit_data = raw_auditable_data(&input_binary)?;
112    if compressed_audit_data.len() > limits.decompressed_json_size {
113        Err(Error::OutputLimitExceeded)?;
114    }
115    Ok(compressed_audit_data.to_owned())
116}
117
118/// The input slice should contain the entire binary.
119/// This function is useful if you have already loaded the binary to memory, e.g. via memory-mapping.
120#[cfg(feature = "serde")]
121pub fn audit_info_from_slice(
122    input_binary: &[u8],
123    decompressed_json_size_limit: usize,
124) -> Result<VersionInfo, Error> {
125    Ok(serde_json::from_str(&json_from_slice(
126        input_binary,
127        decompressed_json_size_limit,
128    )?)?)
129}
130
131/// The input slice should contain the entire binary.
132/// This function is useful if you have already loaded the binary to memory, e.g. via memory-mapping.
133///
134/// Returns the decompressed audit data.
135/// This is useful if you want to forward the data somewhere instead of parsing it to Rust data structures.
136///
137/// If you want to obtain the Zlib-compressed data instead,
138/// use the [`auditable-extract`](https://docs.rs/auditable-extract/) crate directly.
139pub fn json_from_slice(
140    input_binary: &[u8],
141    decompressed_json_size_limit: usize,
142) -> Result<String, Error> {
143    let compressed_audit_data = raw_auditable_data(input_binary)?;
144    if compressed_audit_data.len() > decompressed_json_size_limit {
145        Err(Error::OutputLimitExceeded)?;
146    }
147    let decompressed_data =
148        decompress_to_vec_zlib_with_limit(compressed_audit_data, decompressed_json_size_limit)
149            .map_err(DecompressError::from_miniz)?;
150    Ok(String::from_utf8(decompressed_data)?)
151}
152
153/// Protects against [denial-of-service attacks](https://en.wikipedia.org/wiki/Denial-of-service_attack)
154/// via infinite input streams or [zip bombs](https://en.wikipedia.org/wiki/Zip_bomb),
155/// which would otherwise use up all your memory and crash your machine.
156///
157/// If the limit is exceeded, an error is returned and no further deserialization is attempted.
158///
159/// The default limits are **1 GiB** for the `input_file_size` and **8 MiB** for `decompressed_json_size`.
160///
161/// Note that the `decompressed_json_size` is only enforced on the level of the *serialized* JSON, i.e. a string.
162/// We do not enforce that `serde_json` does not consume more memory when deserializing JSON to Rust data structures.
163/// Unfortunately Rust does not provide APIs for that.
164#[derive(Copy, Clone, Eq, PartialEq, Hash)]
165pub struct Limits {
166    pub input_file_size: usize,
167    pub decompressed_json_size: usize,
168}
169
170impl Default for Limits {
171    fn default() -> Self {
172        Self {
173            input_file_size: 1024 * 1024 * 1024,     // 1GiB
174            decompressed_json_size: 1024 * 1024 * 8, // 8MiB
175        }
176    }
177}
178
179#[cfg(test)]
180mod tests {
181    use super::*;
182
183    #[test]
184    fn input_file_limits() {
185        let limits = Limits {
186            input_file_size: 128,
187            decompressed_json_size: 99999,
188        };
189        let fake_data = vec![0; 1024];
190        let mut reader = std::io::Cursor::new(fake_data);
191        let result = get_compressed_audit_data(&mut reader, limits);
192        assert!(result.is_err());
193        assert!(result
194            .unwrap_err()
195            .to_string()
196            .contains("The input file is too large"));
197    }
198}