quick_xml/de/
resolver.rs

1//! Entity resolver module
2
3use std::convert::Infallible;
4use std::error::Error;
5
6use crate::escape::resolve_predefined_entity;
7use crate::events::BytesText;
8
9/// Used to resolve unknown entities while parsing
10///
11/// # Example
12///
13/// ```
14/// # use serde::Deserialize;
15/// # use pretty_assertions::assert_eq;
16/// use regex::bytes::Regex;
17/// use std::collections::BTreeMap;
18/// use std::string::FromUtf8Error;
19/// use quick_xml::de::{Deserializer, EntityResolver};
20/// use quick_xml::events::BytesText;
21///
22/// struct DocTypeEntityResolver {
23///     re: Regex,
24///     map: BTreeMap<String, String>,
25/// }
26///
27/// impl Default for DocTypeEntityResolver {
28///     fn default() -> Self {
29///         Self {
30///             // We do not focus on true parsing in this example
31///             // You should use special libraries to parse DTD
32///             re: Regex::new(r#"<!ENTITY\s+([^ \t\r\n]+)\s+"([^"]*)"\s*>"#).unwrap(),
33///             map: BTreeMap::new(),
34///         }
35///     }
36/// }
37///
38/// impl EntityResolver for DocTypeEntityResolver {
39///     type Error = FromUtf8Error;
40///
41///     fn capture(&mut self, doctype: BytesText) -> Result<(), Self::Error> {
42///         for cap in self.re.captures_iter(&doctype) {
43///             self.map.insert(
44///                 String::from_utf8(cap[1].to_vec())?,
45///                 String::from_utf8(cap[2].to_vec())?,
46///             );
47///         }
48///         Ok(())
49///     }
50///
51///     fn resolve(&self, entity: &str) -> Option<&str> {
52///         self.map.get(entity).map(|s| s.as_str())
53///     }
54/// }
55///
56/// let xml_reader = br#"
57///     <!DOCTYPE dict[ <!ENTITY e1 "entity 1"> ]>
58///     <root>
59///         <entity_one>&e1;</entity_one>
60///     </root>
61/// "#.as_ref();
62///
63/// let mut de = Deserializer::with_resolver(
64///     xml_reader,
65///     DocTypeEntityResolver::default(),
66/// );
67/// let data: BTreeMap<String, String> = BTreeMap::deserialize(&mut de).unwrap();
68///
69/// assert_eq!(data.get("entity_one"), Some(&"entity 1".to_string()));
70/// ```
71pub trait EntityResolver {
72    /// The error type that represents DTD parse error
73    type Error: Error;
74
75    /// Called on contents of [`Event::DocType`] to capture declared entities.
76    /// Can be called multiple times, for each parsed `<!DOCTYPE >` declaration.
77    ///
78    /// [`Event::DocType`]: crate::events::Event::DocType
79    fn capture(&mut self, doctype: BytesText) -> Result<(), Self::Error>;
80
81    /// Called when an entity needs to be resolved.
82    ///
83    /// `None` is returned if a suitable value can not be found.
84    /// In that case an [`EscapeError::UnrecognizedEntity`] will be returned by
85    /// a deserializer.
86    ///
87    /// [`EscapeError::UnrecognizedEntity`]: crate::escape::EscapeError::UnrecognizedEntity
88    fn resolve(&self, entity: &str) -> Option<&str>;
89}
90
91/// An [`EntityResolver`] that resolves only predefined entities:
92///
93/// | Entity | Resolution
94/// |--------|------------
95/// |`&lt;`  | `<`
96/// |`&gt;`  | `>`
97/// |`&amp;` | `&`
98/// |`&apos;`| `'`
99/// |`&quot;`| `"`
100#[derive(Default, Copy, Clone)]
101pub struct PredefinedEntityResolver;
102
103impl EntityResolver for PredefinedEntityResolver {
104    type Error = Infallible;
105
106    #[inline]
107    fn capture(&mut self, _doctype: BytesText) -> Result<(), Self::Error> {
108        Ok(())
109    }
110
111    #[inline]
112    fn resolve(&self, entity: &str) -> Option<&str> {
113        resolve_predefined_entity(entity)
114    }
115}