1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
//! Entity resolver module

use std::convert::Infallible;
use std::error::Error;

use crate::escape::resolve_predefined_entity;
use crate::events::BytesText;

/// Used to resolve unknown entities while parsing
///
/// # Example
///
/// ```
/// # use serde::Deserialize;
/// # use pretty_assertions::assert_eq;
/// use regex::bytes::Regex;
/// use std::collections::BTreeMap;
/// use std::string::FromUtf8Error;
/// use quick_xml::de::{Deserializer, EntityResolver};
/// use quick_xml::events::BytesText;
///
/// struct DocTypeEntityResolver {
///     re: Regex,
///     map: BTreeMap<String, String>,
/// }
///
/// impl Default for DocTypeEntityResolver {
///     fn default() -> Self {
///         Self {
///             // We do not focus on true parsing in this example
///             // You should use special libraries to parse DTD
///             re: Regex::new(r#"<!ENTITY\s+([^ \t\r\n]+)\s+"([^"]*)"\s*>"#).unwrap(),
///             map: BTreeMap::new(),
///         }
///     }
/// }
///
/// impl EntityResolver for DocTypeEntityResolver {
///     type Error = FromUtf8Error;
///
///     fn capture(&mut self, doctype: BytesText) -> Result<(), Self::Error> {
///         for cap in self.re.captures_iter(&doctype) {
///             self.map.insert(
///                 String::from_utf8(cap[1].to_vec())?,
///                 String::from_utf8(cap[2].to_vec())?,
///             );
///         }
///         Ok(())
///     }
///
///     fn resolve(&self, entity: &str) -> Option<&str> {
///         self.map.get(entity).map(|s| s.as_str())
///     }
/// }
///
/// let xml_reader = br#"
///     <!DOCTYPE dict[ <!ENTITY e1 "entity 1"> ]>
///     <root>
///         <entity_one>&e1;</entity_one>
///     </root>
/// "#.as_ref();
///
/// let mut de = Deserializer::with_resolver(
///     xml_reader,
///     DocTypeEntityResolver::default(),
/// );
/// let data: BTreeMap<String, String> = BTreeMap::deserialize(&mut de).unwrap();
///
/// assert_eq!(data.get("entity_one"), Some(&"entity 1".to_string()));
/// ```
pub trait EntityResolver {
    /// The error type that represents DTD parse error
    type Error: Error;

    /// Called on contents of [`Event::DocType`] to capture declared entities.
    /// Can be called multiple times, for each parsed `<!DOCTYPE >` declaration.
    ///
    /// [`Event::DocType`]: crate::events::Event::DocType
    fn capture(&mut self, doctype: BytesText) -> Result<(), Self::Error>;

    /// Called when an entity needs to be resolved.
    ///
    /// `None` is returned if a suitable value can not be found.
    /// In that case an [`EscapeError::UnrecognizedEntity`] will be returned by
    /// a deserializer.
    ///
    /// [`EscapeError::UnrecognizedEntity`]: crate::escape::EscapeError::UnrecognizedEntity
    fn resolve(&self, entity: &str) -> Option<&str>;
}

/// An [`EntityResolver`] that resolves only predefined entities:
///
/// | Entity | Resolution
/// |--------|------------
/// |`&lt;`  | `<`
/// |`&gt;`  | `>`
/// |`&amp;` | `&`
/// |`&apos;`| `'`
/// |`&quot;`| `"`
#[derive(Default, Copy, Clone)]
pub struct PredefinedEntityResolver;

impl EntityResolver for PredefinedEntityResolver {
    type Error = Infallible;

    #[inline]
    fn capture(&mut self, _doctype: BytesText) -> Result<(), Self::Error> {
        Ok(())
    }

    #[inline]
    fn resolve(&self, entity: &str) -> Option<&str> {
        resolve_predefined_entity(entity)
    }
}