cpp_demangle/
lib.rs

1//! This crate can parse a C++ “mangled” linker symbol name into a Rust value
2//! describing what the name refers to: a variable, a function, a virtual table,
3//! etc. The description type implements `Display`, producing human-readable
4//! text describing the mangled name. Debuggers and profilers can use this crate
5//! to provide more meaningful output.
6//!
7//! C++ requires the compiler to choose names for linker symbols consistently
8//! across compilation units, so that two compilation units that have seen the
9//! same declarations can pair up definitions in one unit with references in
10//! another.  Almost all platforms other than Microsoft Windows follow the
11//! [Itanium C++ ABI][itanium]'s rules for this.
12//!
13//! [itanium]: https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangling
14//!
15//! For example, suppose a C++ compilation unit has the definition:
16//!
17//! ```c++
18//! namespace space {
19//!   int foo(int x, int y) { return x+y; }
20//! }
21//! ```
22//!
23//! The Itanium C++ ABI specifies that the linker symbol for that function must
24//! be named `_ZN5space3fooEii`. This crate can parse that name into a Rust
25//! value representing its structure. Formatting the value with the `format!`
26//! macro or the `std::string::ToString::to_string` trait method yields the
27//! string `space::foo(int, int)`, which is more meaningful to the C++
28//! developer.
29
30#![deny(missing_docs)]
31#![deny(missing_debug_implementations)]
32#![deny(unsafe_code)]
33// Clippy stuff.
34#![allow(unknown_lints)]
35#![allow(clippy::inline_always)]
36#![allow(clippy::redundant_field_names)]
37#![cfg_attr(not(feature = "std"), no_std)]
38
39#[cfg(feature = "alloc")]
40#[macro_use]
41extern crate alloc;
42
43#[cfg(not(feature = "alloc"))]
44compile_error!("`alloc` or `std` feature is required for this crate");
45
46#[macro_use]
47mod logging;
48
49pub mod ast;
50pub mod error;
51mod index_str;
52mod subs;
53
54use alloc::string::String;
55use alloc::vec::Vec;
56use ast::{Demangle, Parse, ParseContext};
57use core::fmt;
58use core::num::NonZeroU32;
59use error::{Error, Result};
60use index_str::IndexStr;
61
62/// Options to control the parsing process.
63#[derive(Clone, Copy, Debug, Default)]
64#[repr(C)]
65pub struct ParseOptions {
66    recursion_limit: Option<NonZeroU32>,
67}
68
69impl ParseOptions {
70    /// Set the limit on recursion depth during the parsing phase. A low
71    /// limit will cause valid symbols to be rejected, but a high limit may
72    /// allow pathological symbols to overflow the stack during parsing.
73    /// The default value is 96, which will not overflow the stack even in
74    /// a debug build.
75    pub fn recursion_limit(mut self, limit: u32) -> Self {
76        self.recursion_limit = Some(NonZeroU32::new(limit).expect("Recursion limit must be > 0"));
77        self
78    }
79}
80
81/// Options to control the demangling process.
82#[derive(Clone, Copy, Debug, Default)]
83#[repr(C)]
84pub struct DemangleOptions {
85    no_params: bool,
86    no_return_type: bool,
87    hide_expression_literal_types: bool,
88    recursion_limit: Option<NonZeroU32>,
89}
90
91impl DemangleOptions {
92    /// Construct a new `DemangleOptions` with the default values.
93    pub fn new() -> Self {
94        Default::default()
95    }
96
97    /// Do not display function arguments.
98    pub fn no_params(mut self) -> Self {
99        self.no_params = true;
100        self
101    }
102
103    /// Do not display the function return type.
104    pub fn no_return_type(mut self) -> Self {
105        self.no_return_type = true;
106        self
107    }
108
109    /// Hide type annotations in template value parameters.
110    /// These are not needed to distinguish template instances
111    /// so this can make it easier to match user-provided
112    /// template instance names.
113    pub fn hide_expression_literal_types(mut self) -> Self {
114        self.hide_expression_literal_types = true;
115        self
116    }
117
118    /// Set the limit on recursion depth during the demangling phase. A low
119    /// limit will cause valid symbols to be rejected, but a high limit may
120    /// allow pathological symbols to overflow the stack during demangling.
121    /// The default value is 128.
122    pub fn recursion_limit(mut self, limit: u32) -> Self {
123        self.recursion_limit = Some(NonZeroU32::new(limit).expect("Recursion limit must be > 0"));
124        self
125    }
126}
127
128/// A `Symbol` which owns the underlying storage for the mangled name.
129pub type OwnedSymbol = Symbol<Vec<u8>>;
130
131/// A `Symbol` which borrows the underlying storage for the mangled name.
132pub type BorrowedSymbol<'a> = Symbol<&'a [u8]>;
133
134/// A mangled symbol that has been parsed into an AST.
135///
136/// This is generic over some storage type `T` which can be either owned or
137/// borrowed. See the `OwnedSymbol` and `BorrowedSymbol` type aliases.
138#[derive(Clone, Debug, PartialEq)]
139pub struct Symbol<T> {
140    raw: T,
141    substitutions: subs::SubstitutionTable,
142    parsed: ast::MangledName,
143}
144
145impl<T> Symbol<T>
146where
147    T: AsRef<[u8]>,
148{
149    /// Given some raw storage, parse the mangled symbol from it with the default
150    /// options.
151    ///
152    /// ```
153    /// use cpp_demangle::Symbol;
154    /// use std::string::ToString;
155    ///
156    /// // First, something easy :)
157    ///
158    /// let mangled = b"_ZN5space3fooEibc";
159    ///
160    /// let sym = Symbol::new(&mangled[..])
161    ///     .expect("Could not parse mangled symbol!");
162    ///
163    /// let demangled = sym.to_string();
164    /// assert_eq!(demangled, "space::foo(int, bool, char)");
165    ///
166    /// // Now let's try something a little more complicated!
167    ///
168    /// let mangled =
169    ///     b"__Z28JS_GetPropertyDescriptorByIdP9JSContextN2JS6HandleIP8JSObjectEENS2_I4jsidEENS1_13MutableHandleINS1_18PropertyDescriptorEEE";
170    ///
171    /// let sym = Symbol::new(&mangled[..])
172    ///     .expect("Could not parse mangled symbol!");
173    ///
174    /// let demangled = sym.to_string();
175    /// assert_eq!(
176    ///     demangled,
177    ///     "JS_GetPropertyDescriptorById(JSContext*, JS::Handle<JSObject*>, JS::Handle<jsid>, JS::MutableHandle<JS::PropertyDescriptor>)"
178    /// );
179    /// ```
180    #[inline]
181    pub fn new(raw: T) -> Result<Symbol<T>> {
182        Self::new_with_options(raw, &Default::default())
183    }
184
185    /// Given some raw storage, parse the mangled symbol from it.
186    ///
187    /// ```
188    /// use cpp_demangle::{ParseOptions, Symbol};
189    /// use std::string::ToString;
190    ///
191    /// // First, something easy :)
192    ///
193    /// let mangled = b"_ZN5space3fooEibc";
194    ///
195    /// let parse_options = ParseOptions::default()
196    ///     .recursion_limit(1024);
197    ///
198    /// let sym = Symbol::new_with_options(&mangled[..], &parse_options)
199    ///     .expect("Could not parse mangled symbol!");
200    ///
201    /// let demangled = sym.to_string();
202    /// assert_eq!(demangled, "space::foo(int, bool, char)");
203    ///
204    /// // Now let's try something a little more complicated!
205    ///
206    /// let mangled =
207    ///     b"__Z28JS_GetPropertyDescriptorByIdP9JSContextN2JS6HandleIP8JSObjectEENS2_I4jsidEENS1_13MutableHandleINS1_18PropertyDescriptorEEE";
208    ///
209    /// let sym = Symbol::new(&mangled[..])
210    ///     .expect("Could not parse mangled symbol!");
211    ///
212    /// let demangled = sym.to_string();
213    /// assert_eq!(
214    ///     demangled,
215    ///     "JS_GetPropertyDescriptorById(JSContext*, JS::Handle<JSObject*>, JS::Handle<jsid>, JS::MutableHandle<JS::PropertyDescriptor>)"
216    /// );
217    /// ```
218    pub fn new_with_options(raw: T, options: &ParseOptions) -> Result<Symbol<T>> {
219        let mut substitutions = subs::SubstitutionTable::new();
220
221        let parsed = {
222            let ctx = ParseContext::new(*options);
223            let input = IndexStr::new(raw.as_ref());
224
225            let (parsed, tail) = ast::MangledName::parse(&ctx, &mut substitutions, input)?;
226            debug_assert!(ctx.recursion_level() == 0);
227
228            if tail.is_empty() {
229                parsed
230            } else {
231                return Err(Error::UnexpectedText);
232            }
233        };
234
235        let symbol = Symbol {
236            raw: raw,
237            substitutions: substitutions,
238            parsed: parsed,
239        };
240
241        log!(
242            "Successfully parsed '{}' as
243
244AST = {:#?}
245
246substitutions = {:#?}",
247            String::from_utf8_lossy(symbol.raw.as_ref()),
248            symbol.parsed,
249            symbol.substitutions
250        );
251
252        Ok(symbol)
253    }
254
255    /// Demangle the symbol and return it as a String.
256    ///
257    /// Unlike the `ToString` implementation, this function allows options to
258    /// be specified.
259    ///
260    /// ```
261    /// use cpp_demangle::{DemangleOptions, Symbol};
262    /// use std::string::ToString;
263    ///
264    /// let mangled = b"_ZN5space3fooEibc";
265    ///
266    /// let sym = Symbol::new(&mangled[..])
267    ///     .expect("Could not parse mangled symbol!");
268    ///
269    /// let demangled = sym.to_string();
270    /// let options = DemangleOptions::default();
271    /// let demangled_again = sym.demangle(&options).unwrap();
272    /// assert_eq!(demangled_again, demangled);
273    /// ```
274    #[allow(clippy::trivially_copy_pass_by_ref)]
275    pub fn demangle(
276        &self,
277        options: &DemangleOptions,
278    ) -> ::core::result::Result<String, fmt::Error> {
279        let mut out = String::new();
280        {
281            let mut ctx = ast::DemangleContext::new(
282                &self.substitutions,
283                self.raw.as_ref(),
284                *options,
285                &mut out,
286            );
287            self.parsed.demangle(&mut ctx, None)?;
288        }
289
290        Ok(out)
291    }
292
293    /// Demangle the symbol to a DemangleWrite, which lets the consumer be informed about
294    /// syntactic structure.
295    #[allow(clippy::trivially_copy_pass_by_ref)]
296    pub fn structured_demangle<W: DemangleWrite>(
297        &self,
298        out: &mut W,
299        options: &DemangleOptions,
300    ) -> fmt::Result {
301        let mut ctx =
302            ast::DemangleContext::new(&self.substitutions, self.raw.as_ref(), *options, out);
303        self.parsed.demangle(&mut ctx, None)
304    }
305}
306
307/// The type of a demangled AST node.
308/// This is only partial, not all nodes are represented.
309#[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
310pub enum DemangleNodeType {
311    /// Entering a <prefix> production
312    Prefix,
313    /// Entering a <template-prefix> production
314    TemplatePrefix,
315    /// Entering a <template-args> production
316    TemplateArgs,
317    /// Entering a <unqualified-name> production
318    UnqualifiedName,
319    /// Entering a <template-param> production
320    TemplateParam,
321    /// Entering a <decltype> production
322    Decltype,
323    /// Entering a <data-member-prefix> production
324    DataMemberPrefix,
325    /// Entering a <nested-name> production
326    NestedName,
327    /// Entering a <special-name> production that is a vtable.
328    VirtualTable,
329    /// Additional values may be added in the future. Use a
330    /// _ pattern for compatibility.
331    __NonExhaustive,
332}
333
334/// Sink for demangled text that reports syntactic structure.
335pub trait DemangleWrite {
336    /// Called when we are entering the scope of some AST node.
337    fn push_demangle_node(&mut self, _: DemangleNodeType) {}
338    /// Same as `fmt::Write::write_str`.
339    fn write_string(&mut self, s: &str) -> fmt::Result;
340    /// Called when we are exiting the scope of some AST node for
341    /// which `push_demangle_node` was called.
342    fn pop_demangle_node(&mut self) {}
343}
344
345impl<W: fmt::Write> DemangleWrite for W {
346    fn write_string(&mut self, s: &str) -> fmt::Result {
347        fmt::Write::write_str(self, s)
348    }
349}
350
351impl<'a, T> Symbol<&'a T>
352where
353    T: AsRef<[u8]> + ?Sized,
354{
355    /// Parse a mangled symbol from input and return it and the trailing tail of
356    /// bytes that come after the symbol, with the default options.
357    ///
358    /// While `Symbol::new` will return an error if there is unexpected trailing
359    /// bytes, `with_tail` simply returns the trailing bytes along with the
360    /// parsed symbol.
361    ///
362    /// ```
363    /// use cpp_demangle::BorrowedSymbol;
364    /// use std::string::ToString;
365    ///
366    /// let mangled = b"_ZN5space3fooEibc and some trailing junk";
367    ///
368    /// let (sym, tail) = BorrowedSymbol::with_tail(&mangled[..])
369    ///     .expect("Could not parse mangled symbol!");
370    ///
371    /// assert_eq!(tail, b" and some trailing junk");
372    ///
373    /// let demangled = sym.to_string();
374    /// assert_eq!(demangled, "space::foo(int, bool, char)");
375    /// ```
376    #[inline]
377    pub fn with_tail(input: &'a T) -> Result<(BorrowedSymbol<'a>, &'a [u8])> {
378        Self::with_tail_and_options(input, &Default::default())
379    }
380
381    /// Parse a mangled symbol from input and return it and the trailing tail of
382    /// bytes that come after the symbol.
383    ///
384    /// While `Symbol::new_with_options` will return an error if there is
385    /// unexpected trailing bytes, `with_tail_and_options` simply returns the
386    /// trailing bytes along with the parsed symbol.
387    ///
388    /// ```
389    /// use cpp_demangle::{BorrowedSymbol, ParseOptions};
390    /// use std::string::ToString;
391    ///
392    /// let mangled = b"_ZN5space3fooEibc and some trailing junk";
393    ///
394    /// let parse_options = ParseOptions::default()
395    ///     .recursion_limit(1024);
396    ///
397    /// let (sym, tail) = BorrowedSymbol::with_tail_and_options(&mangled[..], &parse_options)
398    ///     .expect("Could not parse mangled symbol!");
399    ///
400    /// assert_eq!(tail, b" and some trailing junk");
401    ///
402    /// let demangled = sym.to_string();
403    /// assert_eq!(demangled, "space::foo(int, bool, char)");
404    /// ```
405    pub fn with_tail_and_options(
406        input: &'a T,
407        options: &ParseOptions,
408    ) -> Result<(BorrowedSymbol<'a>, &'a [u8])> {
409        let mut substitutions = subs::SubstitutionTable::new();
410
411        let ctx = ParseContext::new(*options);
412        let idx_str = IndexStr::new(input.as_ref());
413        let (parsed, tail) = ast::MangledName::parse(&ctx, &mut substitutions, idx_str)?;
414        debug_assert!(ctx.recursion_level() == 0);
415
416        let symbol = Symbol {
417            raw: input.as_ref(),
418            substitutions: substitutions,
419            parsed: parsed,
420        };
421
422        log!(
423            "Successfully parsed '{}' as
424
425AST = {:#?}
426
427substitutions = {:#?}",
428            String::from_utf8_lossy(symbol.raw),
429            symbol.parsed,
430            symbol.substitutions
431        );
432
433        Ok((symbol, tail.into()))
434    }
435}
436
437impl<T> fmt::Display for Symbol<T>
438where
439    T: AsRef<[u8]>,
440{
441    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
442        let mut out = String::new();
443        {
444            let options = DemangleOptions::default();
445            let mut ctx = ast::DemangleContext::new(
446                &self.substitutions,
447                self.raw.as_ref(),
448                options,
449                &mut out,
450            );
451            self.parsed.demangle(&mut ctx, None).map_err(|err| {
452                log!("Demangling error: {:#?}", err);
453                fmt::Error
454            })?;
455        }
456        write!(f, "{}", &out)
457    }
458}