symbolic_demangle/
lib.rs

1//! Demangling support for various languages and compilers.
2//!
3//! Currently supported languages are:
4//!
5//! - C++ (GCC-style compilers and MSVC) (`features = ["cpp", "msvc"]`)
6//! - Rust (both `legacy` and `v0`) (`features = ["rust"]`)
7//! - Swift (up to Swift 5.3) (`features = ["swift"]`)
8//! - ObjC (only symbol detection)
9//!
10//! As the demangling schemes for the languages are different, the supported demangling features are
11//! inconsistent. For example, argument types were not encoded in legacy Rust mangling and thus not
12//! available in demangled names.
13//! The demangling results should not be considered stable, and may change over time as more
14//! demangling features are added.
15//!
16//! This module is part of the `symbolic` crate and can be enabled via the `demangle` feature.
17//!
18//! # Examples
19//!
20//! ```rust
21//! # #[cfg(feature = "rust")] {
22//! use symbolic_common::{Language, Name};
23//! use symbolic_demangle::{Demangle, DemangleOptions};
24//!
25//! let name = Name::from("__ZN3std2io4Read11read_to_end17hb85a0f6802e14499E");
26//! assert_eq!(name.detect_language(), Language::Rust);
27//! assert_eq!(
28//!     name.try_demangle(DemangleOptions::complete()),
29//!     "std::io::Read::read_to_end"
30//! );
31//! # }
32//! ```
33
34#![warn(missing_docs)]
35
36use std::borrow::Cow;
37#[cfg(feature = "swift")]
38use std::ffi::{CStr, CString};
39#[cfg(feature = "swift")]
40use std::os::raw::{c_char, c_int};
41
42use symbolic_common::{Language, Name, NameMangling};
43
44#[cfg(feature = "swift")]
45const SYMBOLIC_SWIFT_FEATURE_RETURN_TYPE: c_int = 0x1;
46#[cfg(feature = "swift")]
47const SYMBOLIC_SWIFT_FEATURE_PARAMETERS: c_int = 0x2;
48
49#[cfg(feature = "swift")]
50extern "C" {
51    fn symbolic_demangle_swift(
52        sym: *const c_char,
53        buf: *mut c_char,
54        buf_len: usize,
55        features: c_int,
56    ) -> c_int;
57
58    fn symbolic_demangle_is_swift_symbol(sym: *const c_char) -> c_int;
59}
60
61/// Options for [`Demangle::demangle`].
62///
63/// One can chose from complete, or name-only demangling, and toggle specific demangling features
64/// explicitly.
65///
66/// The resulting output depends very much on the language of the mangled [`Name`], and may change
67/// over time as more fine grained demangling options and features are added. Not all options are
68/// fully supported by each language, and not every feature is mutually exclusive on all languages.
69///
70/// # Examples
71///
72/// ```
73/// # #[cfg(feature = "swift")] {
74/// use symbolic_common::{Name, NameMangling, Language};
75/// use symbolic_demangle::{Demangle, DemangleOptions};
76///
77/// let symbol = Name::new("$s8mangling12GenericUnionO3FooyACyxGSicAEmlF", NameMangling::Mangled, Language::Swift);
78///
79/// let simple = symbol.demangle(DemangleOptions::name_only()).unwrap();
80/// assert_eq!(&simple, "GenericUnion.Foo<A>");
81///
82/// let full = symbol.demangle(DemangleOptions::complete()).unwrap();
83/// assert_eq!(&full, "mangling.GenericUnion.Foo<A>(mangling.GenericUnion<A>.Type) -> (Swift.Int) -> mangling.GenericUnion<A>");
84/// # }
85/// ```
86///
87/// [`Demangle::demangle`]: trait.Demangle.html#tymethod.demangle
88#[derive(Clone, Copy, Debug)]
89pub struct DemangleOptions {
90    return_type: bool,
91    parameters: bool,
92}
93
94impl DemangleOptions {
95    /// DemangleOptions that output a complete verbose demangling.
96    pub const fn complete() -> Self {
97        Self {
98            return_type: true,
99            parameters: true,
100        }
101    }
102
103    /// DemangleOptions that output the most simple (likely name-only) demangling.
104    pub const fn name_only() -> Self {
105        Self {
106            return_type: false,
107            parameters: false,
108        }
109    }
110
111    /// Determines whether a functions return type should be demangled.
112    pub const fn return_type(mut self, return_type: bool) -> Self {
113        self.return_type = return_type;
114        self
115    }
116
117    /// Determines whether function argument types should be demangled.
118    pub const fn parameters(mut self, parameters: bool) -> Self {
119        self.parameters = parameters;
120        self
121    }
122}
123
124fn is_maybe_objc(ident: &str) -> bool {
125    (ident.starts_with("-[") || ident.starts_with("+[")) && ident.ends_with(']')
126}
127
128fn is_maybe_cpp(ident: &str) -> bool {
129    ident.starts_with("_Z")
130        || ident.starts_with("__Z")
131        || ident.starts_with("___Z")
132        || ident.starts_with("____Z")
133}
134
135fn is_maybe_msvc(ident: &str) -> bool {
136    ident.starts_with('?') || ident.starts_with("@?")
137}
138
139/// An MD5 mangled name consists of the prefix "??@", 32 hex digits,
140/// and the suffix "@".
141fn is_maybe_md5(ident: &str) -> bool {
142    if ident.len() != 36 {
143        return false;
144    }
145
146    ident.starts_with("??@")
147        && ident.ends_with('@')
148        && ident[3..35].chars().all(|c| c.is_ascii_hexdigit())
149}
150
151#[cfg(feature = "swift")]
152fn is_maybe_swift(ident: &str) -> bool {
153    CString::new(ident)
154        .map(|cstr| unsafe { symbolic_demangle_is_swift_symbol(cstr.as_ptr()) != 0 })
155        .unwrap_or(false)
156}
157
158#[cfg(not(feature = "swift"))]
159fn is_maybe_swift(_ident: &str) -> bool {
160    false
161}
162
163#[cfg(feature = "msvc")]
164fn try_demangle_msvc(ident: &str, opts: DemangleOptions) -> Option<String> {
165    use msvc_demangler::DemangleFlags as MsvcFlags;
166
167    // the flags are bitflags
168    let mut flags = MsvcFlags::COMPLETE
169        | MsvcFlags::SPACE_AFTER_COMMA
170        | MsvcFlags::HUG_TYPE
171        | MsvcFlags::NO_MS_KEYWORDS
172        | MsvcFlags::NO_CLASS_TYPE;
173    if !opts.return_type {
174        flags |= MsvcFlags::NO_FUNCTION_RETURNS;
175    }
176    if !opts.parameters {
177        // a `NO_ARGUMENTS` flag is there in the code, but commented out
178        flags |= MsvcFlags::NAME_ONLY;
179    }
180
181    msvc_demangler::demangle(ident, flags).ok()
182}
183
184#[cfg(not(feature = "msvc"))]
185fn try_demangle_msvc(_ident: &str, _opts: DemangleOptions) -> Option<String> {
186    None
187}
188
189/// Removes a suffix consisting of $ followed by 32 hex digits, if there is one,
190/// otherwise returns its input.
191fn strip_hash_suffix(ident: &str) -> &str {
192    let len = ident.len();
193    if len >= 33 {
194        let mut char_iter = ident.char_indices();
195        while let Some((pos, c)) = char_iter.next_back() {
196            if (len - pos) == 33 && c == '$' {
197                // If we have not yet returned we have a valid suffix to strip.  This is
198                // safe because we know the current pos is on the start of the '$' char
199                // boundary.
200                return &ident[..pos];
201            } else if (len - pos) > 33 || !c.is_ascii_hexdigit() {
202                // If pos is more than 33 bytes from the end a multibyte char made us skip
203                // pos 33, multibyte chars are not hexdigit or $ so nothing to strip.
204                return ident;
205            }
206        }
207    }
208    ident
209}
210
211struct BoundedString {
212    str: String,
213    bound: usize,
214}
215
216impl BoundedString {
217    fn new(bound: usize) -> Self {
218        Self {
219            str: String::new(),
220            bound,
221        }
222    }
223
224    pub fn into_inner(self) -> String {
225        self.str
226    }
227}
228
229impl std::fmt::Write for BoundedString {
230    fn write_str(&mut self, s: &str) -> std::fmt::Result {
231        if self.str.len().saturating_add(s.len()) > self.bound {
232            return Err(std::fmt::Error);
233        }
234        self.str.write_str(s)
235    }
236}
237
238fn try_demangle_cpp(ident: &str, opts: DemangleOptions) -> Option<String> {
239    if is_maybe_msvc(ident) {
240        return try_demangle_msvc(ident, opts);
241    }
242
243    // C++ *symbols* will always start with a `_Z` prefix, but `cpp_demangle` is a bit more lenient
244    // and will also demangle bare types, turning `a` into `signed char` for example. So lets be
245    // a bit stricter and make sure we always have a `_Z` prefix.
246    if !is_maybe_cpp(ident) {
247        return None;
248    }
249
250    #[cfg(feature = "cpp")]
251    {
252        use cpp_demangle::{DemangleOptions as CppOptions, ParseOptions, Symbol as CppSymbol};
253
254        let stripped = strip_hash_suffix(ident);
255
256        let parse_options = ParseOptions::default().recursion_limit(160); // default is 96
257        let symbol = match CppSymbol::new_with_options(stripped, &parse_options) {
258            Ok(symbol) => symbol,
259            Err(_) => return None,
260        };
261
262        let mut cpp_options = CppOptions::new().recursion_limit(192); // default is 128
263        if !opts.parameters {
264            cpp_options = cpp_options.no_params();
265        }
266        if !opts.return_type {
267            cpp_options = cpp_options.no_return_type();
268        }
269
270        // Bound the maximum output string, as a huge number of substitutions could potentially
271        // lead to a "Billion laughs attack".
272        let mut buf = BoundedString::new(4096);
273
274        symbol
275            .structured_demangle(&mut buf, &cpp_options)
276            .ok()
277            .map(|_| buf.into_inner())
278    }
279    #[cfg(not(feature = "cpp"))]
280    {
281        None
282    }
283}
284
285#[cfg(feature = "rust")]
286fn try_demangle_rust(ident: &str, _opts: DemangleOptions) -> Option<String> {
287    match rustc_demangle::try_demangle(ident) {
288        Ok(demangled) => Some(format!("{demangled:#}")),
289        Err(_) => None,
290    }
291}
292
293#[cfg(not(feature = "rust"))]
294fn try_demangle_rust(_ident: &str, _opts: DemangleOptions) -> Option<String> {
295    None
296}
297
298#[cfg(feature = "swift")]
299fn try_demangle_swift(ident: &str, opts: DemangleOptions) -> Option<String> {
300    let mut buf = vec![0; 4096];
301    let sym = match CString::new(ident) {
302        Ok(sym) => sym,
303        Err(_) => return None,
304    };
305
306    let mut features = 0;
307    if opts.return_type {
308        features |= SYMBOLIC_SWIFT_FEATURE_RETURN_TYPE;
309    }
310    if opts.parameters {
311        features |= SYMBOLIC_SWIFT_FEATURE_PARAMETERS;
312    }
313
314    unsafe {
315        match symbolic_demangle_swift(sym.as_ptr(), buf.as_mut_ptr(), buf.len(), features) {
316            0 => None,
317            _ => Some(CStr::from_ptr(buf.as_ptr()).to_string_lossy().to_string()),
318        }
319    }
320}
321
322#[cfg(not(feature = "swift"))]
323fn try_demangle_swift(_ident: &str, _opts: DemangleOptions) -> Option<String> {
324    None
325}
326
327fn demangle_objc(ident: &str, _opts: DemangleOptions) -> String {
328    ident.to_string()
329}
330
331fn try_demangle_objcpp(ident: &str, opts: DemangleOptions) -> Option<String> {
332    if is_maybe_objc(ident) {
333        Some(demangle_objc(ident, opts))
334    } else if is_maybe_cpp(ident) {
335        try_demangle_cpp(ident, opts)
336    } else {
337        None
338    }
339}
340
341/// An extension trait on `Name` for demangling names.
342///
343/// See the [module level documentation] for a list of supported languages.
344///
345/// [module level documentation]: index.html
346pub trait Demangle {
347    /// Infers the language of a mangled name.
348    ///
349    /// In case the symbol is not mangled or its language is unknown, the return value will be
350    /// `Language::Unknown`. If the language of the symbol was specified explicitly, this is
351    /// returned instead. For a list of supported languages, see the [module level documentation].
352    ///
353    /// # Examples
354    ///
355    /// ```
356    /// use symbolic_common::{Language, Name};
357    /// use symbolic_demangle::{Demangle, DemangleOptions};
358    ///
359    /// assert_eq!(Name::from("_ZN3foo3barEv").detect_language(), Language::Cpp);
360    /// assert_eq!(Name::from("unknown").detect_language(), Language::Unknown);
361    /// ```
362    ///
363    /// [module level documentation]: index.html
364    fn detect_language(&self) -> Language;
365
366    /// Demangles the name with the given options.
367    ///
368    /// Returns `None` in one of the following cases:
369    ///  1. The language cannot be detected.
370    ///  2. The language is not supported.
371    ///  3. Demangling of the name failed.
372    ///
373    /// # Examples
374    ///
375    /// ```
376    /// # #[cfg(feature = "cpp")] {
377    /// use symbolic_common::Name;
378    /// use symbolic_demangle::{Demangle, DemangleOptions};
379    ///
380    /// assert_eq!(
381    ///     Name::from("_ZN3foo3barEv").demangle(DemangleOptions::name_only()),
382    ///     Some("foo::bar".to_string())
383    /// );
384    /// assert_eq!(
385    ///     Name::from("unknown").demangle(DemangleOptions::name_only()),
386    ///     None
387    /// );
388    /// # }
389    /// ```
390    fn demangle(&self, opts: DemangleOptions) -> Option<String>;
391
392    /// Tries to demangle the name and falls back to the original name.
393    ///
394    /// Similar to [`demangle`], except that it returns a borrowed instance of the original name if
395    /// the name cannot be demangled.
396    ///
397    /// # Examples
398    ///
399    /// ```
400    /// # #[cfg(feature = "cpp")] {
401    /// use symbolic_common::Name;
402    /// use symbolic_demangle::{Demangle, DemangleOptions};
403    ///
404    /// assert_eq!(
405    ///     Name::from("_ZN3foo3barEv").try_demangle(DemangleOptions::name_only()),
406    ///     "foo::bar"
407    /// );
408    /// assert_eq!(
409    ///     Name::from("unknown").try_demangle(DemangleOptions::name_only()),
410    ///     "unknown"
411    /// );
412    /// # }
413    /// ```
414    ///
415    /// [`demangle`]: trait.Demangle.html#tymethod.demangle
416    fn try_demangle(&self, opts: DemangleOptions) -> Cow<'_, str>;
417}
418
419impl Demangle for Name<'_> {
420    fn detect_language(&self) -> Language {
421        if self.language() != Language::Unknown {
422            return self.language();
423        }
424
425        if is_maybe_objc(self.as_str()) {
426            return Language::ObjC;
427        }
428
429        #[cfg(feature = "rust")]
430        {
431            if rustc_demangle::try_demangle(self.as_str()).is_ok() {
432                return Language::Rust;
433            }
434        }
435
436        if is_maybe_cpp(self.as_str()) || is_maybe_msvc(self.as_str()) {
437            return Language::Cpp;
438        }
439
440        if is_maybe_swift(self.as_str()) {
441            return Language::Swift;
442        }
443
444        Language::Unknown
445    }
446
447    fn demangle(&self, opts: DemangleOptions) -> Option<String> {
448        if matches!(self.mangling(), NameMangling::Unmangled) || is_maybe_md5(self.as_str()) {
449            return Some(self.to_string());
450        }
451
452        match self.detect_language() {
453            Language::ObjC => Some(demangle_objc(self.as_str(), opts)),
454            Language::ObjCpp => try_demangle_objcpp(self.as_str(), opts),
455            Language::Rust => try_demangle_rust(self.as_str(), opts),
456            Language::Cpp => try_demangle_cpp(self.as_str(), opts),
457            Language::Swift => try_demangle_swift(self.as_str(), opts),
458            _ => None,
459        }
460    }
461
462    fn try_demangle(&self, opts: DemangleOptions) -> Cow<'_, str> {
463        if matches!(self.mangling(), NameMangling::Unmangled) {
464            return Cow::Borrowed(self.as_str());
465        }
466        match self.demangle(opts) {
467            Some(demangled) => Cow::Owned(demangled),
468            None => Cow::Borrowed(self.as_str()),
469        }
470    }
471}
472
473/// Demangles an identifier and falls back to the original symbol.
474///
475/// This is a shortcut for [`Demangle::try_demangle`] with complete demangling.
476///
477/// # Examples
478///
479/// ```
480/// # #[cfg(feature = "cpp")] {
481/// assert_eq!(symbolic_demangle::demangle("_ZN3foo3barEv"), "foo::bar()");
482/// # }
483/// ```
484///
485/// [`Demangle::try_demangle`]: trait.Demangle.html#tymethod.try_demangle
486pub fn demangle(ident: &str) -> Cow<'_, str> {
487    match Name::from(ident).demangle(DemangleOptions::complete()) {
488        Some(demangled) => Cow::Owned(demangled),
489        None => Cow::Borrowed(ident),
490    }
491}
492
493#[cfg(test)]
494mod test {
495    use super::*;
496
497    #[test]
498    fn simple_md5() {
499        let md5_mangled = "??@8ba8d245c9eca390356129098dbe9f73@";
500        assert_eq!(
501            Name::from(md5_mangled)
502                .demangle(DemangleOptions::name_only())
503                .unwrap(),
504            md5_mangled
505        );
506    }
507
508    #[test]
509    fn test_strip_hash_suffix() {
510        assert_eq!(
511            strip_hash_suffix("hello$0123456789abcdef0123456789abcdef"),
512            "hello"
513        );
514        assert_eq!(
515            strip_hash_suffix("hello_0123456789abcdef0123456789abcdef"),
516            "hello_0123456789abcdef0123456789abcdef",
517        );
518        assert_eq!(
519            strip_hash_suffix("hello\u{1000}0123456789abcdef0123456789abcdef"),
520            "hello\u{1000}0123456789abcdef0123456789abcdef"
521        );
522        assert_eq!(
523            strip_hash_suffix("hello$0123456789abcdef0123456789abcdxx"),
524            "hello$0123456789abcdef0123456789abcdxx"
525        );
526        assert_eq!(
527            strip_hash_suffix("hello$\u{1000}0123456789abcdef0123456789abcde"),
528            "hello$\u{1000}0123456789abcdef0123456789abcde"
529        );
530    }
531}