cpp_demangle/lib.rs
1//! This crate can parse a C++ “mangled” linker symbol name into a Rust value
2//! describing what the name refers to: a variable, a function, a virtual table,
3//! etc. The description type implements `Display`, producing human-readable
4//! text describing the mangled name. Debuggers and profilers can use this crate
5//! to provide more meaningful output.
6//!
7//! C++ requires the compiler to choose names for linker symbols consistently
8//! across compilation units, so that two compilation units that have seen the
9//! same declarations can pair up definitions in one unit with references in
10//! another. Almost all platforms other than Microsoft Windows follow the
11//! [Itanium C++ ABI][itanium]'s rules for this.
12//!
13//! [itanium]: https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangling
14//!
15//! For example, suppose a C++ compilation unit has the definition:
16//!
17//! ```c++
18//! namespace space {
19//! int foo(int x, int y) { return x+y; }
20//! }
21//! ```
22//!
23//! The Itanium C++ ABI specifies that the linker symbol for that function must
24//! be named `_ZN5space3fooEii`. This crate can parse that name into a Rust
25//! value representing its structure. Formatting the value with the `format!`
26//! macro or the `std::string::ToString::to_string` trait method yields the
27//! string `space::foo(int, int)`, which is more meaningful to the C++
28//! developer.
29
30#![deny(missing_docs)]
31#![deny(missing_debug_implementations)]
32#![deny(unsafe_code)]
33// Clippy stuff.
34#![allow(unknown_lints)]
35#![allow(clippy::inline_always)]
36#![allow(clippy::redundant_field_names)]
37#![cfg_attr(not(feature = "std"), no_std)]
38
39#[cfg(feature = "alloc")]
40#[macro_use]
41extern crate alloc;
42
43#[cfg(not(feature = "alloc"))]
44compile_error!("`alloc` or `std` feature is required for this crate");
45
46#[macro_use]
47mod logging;
48
49pub mod ast;
50pub mod error;
51mod index_str;
52mod subs;
53
54use alloc::string::String;
55use alloc::vec::Vec;
56use ast::{Demangle, Parse, ParseContext};
57use core::fmt;
58use core::num::NonZeroU32;
59use error::{Error, Result};
60use index_str::IndexStr;
61
62/// Options to control the parsing process.
63#[derive(Clone, Copy, Debug, Default)]
64#[repr(C)]
65pub struct ParseOptions {
66 recursion_limit: Option<NonZeroU32>,
67}
68
69impl ParseOptions {
70 /// Set the limit on recursion depth during the parsing phase. A low
71 /// limit will cause valid symbols to be rejected, but a high limit may
72 /// allow pathological symbols to overflow the stack during parsing.
73 /// The default value is 96, which will not overflow the stack even in
74 /// a debug build.
75 pub fn recursion_limit(mut self, limit: u32) -> Self {
76 self.recursion_limit = Some(NonZeroU32::new(limit).expect("Recursion limit must be > 0"));
77 self
78 }
79}
80
81/// Options to control the demangling process.
82#[derive(Clone, Copy, Debug, Default)]
83#[repr(C)]
84pub struct DemangleOptions {
85 no_params: bool,
86 no_return_type: bool,
87 hide_expression_literal_types: bool,
88 recursion_limit: Option<NonZeroU32>,
89}
90
91impl DemangleOptions {
92 /// Construct a new `DemangleOptions` with the default values.
93 pub fn new() -> Self {
94 Default::default()
95 }
96
97 /// Do not display function arguments.
98 pub fn no_params(mut self) -> Self {
99 self.no_params = true;
100 self
101 }
102
103 /// Do not display the function return type.
104 pub fn no_return_type(mut self) -> Self {
105 self.no_return_type = true;
106 self
107 }
108
109 /// Hide type annotations in template value parameters.
110 /// These are not needed to distinguish template instances
111 /// so this can make it easier to match user-provided
112 /// template instance names.
113 pub fn hide_expression_literal_types(mut self) -> Self {
114 self.hide_expression_literal_types = true;
115 self
116 }
117
118 /// Set the limit on recursion depth during the demangling phase. A low
119 /// limit will cause valid symbols to be rejected, but a high limit may
120 /// allow pathological symbols to overflow the stack during demangling.
121 /// The default value is 128.
122 pub fn recursion_limit(mut self, limit: u32) -> Self {
123 self.recursion_limit = Some(NonZeroU32::new(limit).expect("Recursion limit must be > 0"));
124 self
125 }
126}
127
128/// A `Symbol` which owns the underlying storage for the mangled name.
129pub type OwnedSymbol = Symbol<Vec<u8>>;
130
131/// A `Symbol` which borrows the underlying storage for the mangled name.
132pub type BorrowedSymbol<'a> = Symbol<&'a [u8]>;
133
134/// A mangled symbol that has been parsed into an AST.
135///
136/// This is generic over some storage type `T` which can be either owned or
137/// borrowed. See the `OwnedSymbol` and `BorrowedSymbol` type aliases.
138#[derive(Clone, Debug, PartialEq)]
139pub struct Symbol<T> {
140 raw: T,
141 substitutions: subs::SubstitutionTable,
142 parsed: ast::MangledName,
143}
144
145impl<T> Symbol<T>
146where
147 T: AsRef<[u8]>,
148{
149 /// Given some raw storage, parse the mangled symbol from it with the default
150 /// options.
151 ///
152 /// ```
153 /// use cpp_demangle::Symbol;
154 /// use std::string::ToString;
155 ///
156 /// // First, something easy :)
157 ///
158 /// let mangled = b"_ZN5space3fooEibc";
159 ///
160 /// let sym = Symbol::new(&mangled[..])
161 /// .expect("Could not parse mangled symbol!");
162 ///
163 /// let demangled = sym.to_string();
164 /// assert_eq!(demangled, "space::foo(int, bool, char)");
165 ///
166 /// // Now let's try something a little more complicated!
167 ///
168 /// let mangled =
169 /// b"__Z28JS_GetPropertyDescriptorByIdP9JSContextN2JS6HandleIP8JSObjectEENS2_I4jsidEENS1_13MutableHandleINS1_18PropertyDescriptorEEE";
170 ///
171 /// let sym = Symbol::new(&mangled[..])
172 /// .expect("Could not parse mangled symbol!");
173 ///
174 /// let demangled = sym.to_string();
175 /// assert_eq!(
176 /// demangled,
177 /// "JS_GetPropertyDescriptorById(JSContext*, JS::Handle<JSObject*>, JS::Handle<jsid>, JS::MutableHandle<JS::PropertyDescriptor>)"
178 /// );
179 /// ```
180 #[inline]
181 pub fn new(raw: T) -> Result<Symbol<T>> {
182 Self::new_with_options(raw, &Default::default())
183 }
184
185 /// Given some raw storage, parse the mangled symbol from it.
186 ///
187 /// ```
188 /// use cpp_demangle::{ParseOptions, Symbol};
189 /// use std::string::ToString;
190 ///
191 /// // First, something easy :)
192 ///
193 /// let mangled = b"_ZN5space3fooEibc";
194 ///
195 /// let parse_options = ParseOptions::default()
196 /// .recursion_limit(1024);
197 ///
198 /// let sym = Symbol::new_with_options(&mangled[..], &parse_options)
199 /// .expect("Could not parse mangled symbol!");
200 ///
201 /// let demangled = sym.to_string();
202 /// assert_eq!(demangled, "space::foo(int, bool, char)");
203 ///
204 /// // Now let's try something a little more complicated!
205 ///
206 /// let mangled =
207 /// b"__Z28JS_GetPropertyDescriptorByIdP9JSContextN2JS6HandleIP8JSObjectEENS2_I4jsidEENS1_13MutableHandleINS1_18PropertyDescriptorEEE";
208 ///
209 /// let sym = Symbol::new(&mangled[..])
210 /// .expect("Could not parse mangled symbol!");
211 ///
212 /// let demangled = sym.to_string();
213 /// assert_eq!(
214 /// demangled,
215 /// "JS_GetPropertyDescriptorById(JSContext*, JS::Handle<JSObject*>, JS::Handle<jsid>, JS::MutableHandle<JS::PropertyDescriptor>)"
216 /// );
217 /// ```
218 pub fn new_with_options(raw: T, options: &ParseOptions) -> Result<Symbol<T>> {
219 let mut substitutions = subs::SubstitutionTable::new();
220
221 let parsed = {
222 let ctx = ParseContext::new(*options);
223 let input = IndexStr::new(raw.as_ref());
224
225 let (parsed, tail) = ast::MangledName::parse(&ctx, &mut substitutions, input)?;
226 debug_assert!(ctx.recursion_level() == 0);
227
228 if tail.is_empty() {
229 parsed
230 } else {
231 return Err(Error::UnexpectedText);
232 }
233 };
234
235 let symbol = Symbol {
236 raw: raw,
237 substitutions: substitutions,
238 parsed: parsed,
239 };
240
241 log!(
242 "Successfully parsed '{}' as
243
244AST = {:#?}
245
246substitutions = {:#?}",
247 String::from_utf8_lossy(symbol.raw.as_ref()),
248 symbol.parsed,
249 symbol.substitutions
250 );
251
252 Ok(symbol)
253 }
254
255 /// Demangle the symbol and return it as a String.
256 ///
257 /// Unlike the `ToString` implementation, this function allows options to
258 /// be specified.
259 ///
260 /// ```
261 /// use cpp_demangle::{DemangleOptions, Symbol};
262 /// use std::string::ToString;
263 ///
264 /// let mangled = b"_ZN5space3fooEibc";
265 ///
266 /// let sym = Symbol::new(&mangled[..])
267 /// .expect("Could not parse mangled symbol!");
268 ///
269 /// let demangled = sym.to_string();
270 /// let options = DemangleOptions::default();
271 /// let demangled_again = sym.demangle(&options).unwrap();
272 /// assert_eq!(demangled_again, demangled);
273 /// ```
274 #[allow(clippy::trivially_copy_pass_by_ref)]
275 pub fn demangle(
276 &self,
277 options: &DemangleOptions,
278 ) -> ::core::result::Result<String, fmt::Error> {
279 let mut out = String::new();
280 {
281 let mut ctx = ast::DemangleContext::new(
282 &self.substitutions,
283 self.raw.as_ref(),
284 *options,
285 &mut out,
286 );
287 self.parsed.demangle(&mut ctx, None)?;
288 }
289
290 Ok(out)
291 }
292
293 /// Demangle the symbol to a DemangleWrite, which lets the consumer be informed about
294 /// syntactic structure.
295 #[allow(clippy::trivially_copy_pass_by_ref)]
296 pub fn structured_demangle<W: DemangleWrite>(
297 &self,
298 out: &mut W,
299 options: &DemangleOptions,
300 ) -> fmt::Result {
301 let mut ctx =
302 ast::DemangleContext::new(&self.substitutions, self.raw.as_ref(), *options, out);
303 self.parsed.demangle(&mut ctx, None)
304 }
305}
306
307/// The type of a demangled AST node.
308/// This is only partial, not all nodes are represented.
309#[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
310pub enum DemangleNodeType {
311 /// Entering a <prefix> production
312 Prefix,
313 /// Entering a <template-prefix> production
314 TemplatePrefix,
315 /// Entering a <template-args> production
316 TemplateArgs,
317 /// Entering a <unqualified-name> production
318 UnqualifiedName,
319 /// Entering a <template-param> production
320 TemplateParam,
321 /// Entering a <decltype> production
322 Decltype,
323 /// Entering a <data-member-prefix> production
324 DataMemberPrefix,
325 /// Entering a <nested-name> production
326 NestedName,
327 /// Entering a <special-name> production that is a vtable.
328 VirtualTable,
329 /// Additional values may be added in the future. Use a
330 /// _ pattern for compatibility.
331 __NonExhaustive,
332}
333
334/// Sink for demangled text that reports syntactic structure.
335pub trait DemangleWrite {
336 /// Called when we are entering the scope of some AST node.
337 fn push_demangle_node(&mut self, _: DemangleNodeType) {}
338 /// Same as `fmt::Write::write_str`.
339 fn write_string(&mut self, s: &str) -> fmt::Result;
340 /// Called when we are exiting the scope of some AST node for
341 /// which `push_demangle_node` was called.
342 fn pop_demangle_node(&mut self) {}
343}
344
345impl<W: fmt::Write> DemangleWrite for W {
346 fn write_string(&mut self, s: &str) -> fmt::Result {
347 fmt::Write::write_str(self, s)
348 }
349}
350
351impl<'a, T> Symbol<&'a T>
352where
353 T: AsRef<[u8]> + ?Sized,
354{
355 /// Parse a mangled symbol from input and return it and the trailing tail of
356 /// bytes that come after the symbol, with the default options.
357 ///
358 /// While `Symbol::new` will return an error if there is unexpected trailing
359 /// bytes, `with_tail` simply returns the trailing bytes along with the
360 /// parsed symbol.
361 ///
362 /// ```
363 /// use cpp_demangle::BorrowedSymbol;
364 /// use std::string::ToString;
365 ///
366 /// let mangled = b"_ZN5space3fooEibc and some trailing junk";
367 ///
368 /// let (sym, tail) = BorrowedSymbol::with_tail(&mangled[..])
369 /// .expect("Could not parse mangled symbol!");
370 ///
371 /// assert_eq!(tail, b" and some trailing junk");
372 ///
373 /// let demangled = sym.to_string();
374 /// assert_eq!(demangled, "space::foo(int, bool, char)");
375 /// ```
376 #[inline]
377 pub fn with_tail(input: &'a T) -> Result<(BorrowedSymbol<'a>, &'a [u8])> {
378 Self::with_tail_and_options(input, &Default::default())
379 }
380
381 /// Parse a mangled symbol from input and return it and the trailing tail of
382 /// bytes that come after the symbol.
383 ///
384 /// While `Symbol::new_with_options` will return an error if there is
385 /// unexpected trailing bytes, `with_tail_and_options` simply returns the
386 /// trailing bytes along with the parsed symbol.
387 ///
388 /// ```
389 /// use cpp_demangle::{BorrowedSymbol, ParseOptions};
390 /// use std::string::ToString;
391 ///
392 /// let mangled = b"_ZN5space3fooEibc and some trailing junk";
393 ///
394 /// let parse_options = ParseOptions::default()
395 /// .recursion_limit(1024);
396 ///
397 /// let (sym, tail) = BorrowedSymbol::with_tail_and_options(&mangled[..], &parse_options)
398 /// .expect("Could not parse mangled symbol!");
399 ///
400 /// assert_eq!(tail, b" and some trailing junk");
401 ///
402 /// let demangled = sym.to_string();
403 /// assert_eq!(demangled, "space::foo(int, bool, char)");
404 /// ```
405 pub fn with_tail_and_options(
406 input: &'a T,
407 options: &ParseOptions,
408 ) -> Result<(BorrowedSymbol<'a>, &'a [u8])> {
409 let mut substitutions = subs::SubstitutionTable::new();
410
411 let ctx = ParseContext::new(*options);
412 let idx_str = IndexStr::new(input.as_ref());
413 let (parsed, tail) = ast::MangledName::parse(&ctx, &mut substitutions, idx_str)?;
414 debug_assert!(ctx.recursion_level() == 0);
415
416 let symbol = Symbol {
417 raw: input.as_ref(),
418 substitutions: substitutions,
419 parsed: parsed,
420 };
421
422 log!(
423 "Successfully parsed '{}' as
424
425AST = {:#?}
426
427substitutions = {:#?}",
428 String::from_utf8_lossy(symbol.raw),
429 symbol.parsed,
430 symbol.substitutions
431 );
432
433 Ok((symbol, tail.into()))
434 }
435}
436
437impl<T> fmt::Display for Symbol<T>
438where
439 T: AsRef<[u8]>,
440{
441 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
442 let mut out = String::new();
443 {
444 let options = DemangleOptions::default();
445 let mut ctx = ast::DemangleContext::new(
446 &self.substitutions,
447 self.raw.as_ref(),
448 options,
449 &mut out,
450 );
451 self.parsed.demangle(&mut ctx, None).map_err(|err| {
452 log!("Demangling error: {:#?}", err);
453 fmt::Error
454 })?;
455 }
456 write!(f, "{}", &out)
457 }
458}