quick_xml/de/resolver.rs
1//! Entity resolver module
2
3use std::convert::Infallible;
4use std::error::Error;
5
6use crate::escape::resolve_predefined_entity;
7use crate::events::BytesText;
8
9/// Used to resolve unknown entities while parsing
10///
11/// # Example
12///
13/// ```
14/// # use serde::Deserialize;
15/// # use pretty_assertions::assert_eq;
16/// use regex::bytes::Regex;
17/// use std::collections::BTreeMap;
18/// use std::string::FromUtf8Error;
19/// use quick_xml::de::{Deserializer, EntityResolver};
20/// use quick_xml::events::BytesText;
21///
22/// struct DocTypeEntityResolver {
23/// re: Regex,
24/// map: BTreeMap<String, String>,
25/// }
26///
27/// impl Default for DocTypeEntityResolver {
28/// fn default() -> Self {
29/// Self {
30/// // We do not focus on true parsing in this example
31/// // You should use special libraries to parse DTD
32/// re: Regex::new(r#"<!ENTITY\s+([^ \t\r\n]+)\s+"([^"]*)"\s*>"#).unwrap(),
33/// map: BTreeMap::new(),
34/// }
35/// }
36/// }
37///
38/// impl EntityResolver for DocTypeEntityResolver {
39/// type Error = FromUtf8Error;
40///
41/// fn capture(&mut self, doctype: BytesText) -> Result<(), Self::Error> {
42/// for cap in self.re.captures_iter(&doctype) {
43/// self.map.insert(
44/// String::from_utf8(cap[1].to_vec())?,
45/// String::from_utf8(cap[2].to_vec())?,
46/// );
47/// }
48/// Ok(())
49/// }
50///
51/// fn resolve(&self, entity: &str) -> Option<&str> {
52/// self.map.get(entity).map(|s| s.as_str())
53/// }
54/// }
55///
56/// let xml_reader = br#"
57/// <!DOCTYPE dict[ <!ENTITY e1 "entity 1"> ]>
58/// <root>
59/// <entity_one>&e1;</entity_one>
60/// </root>
61/// "#.as_ref();
62///
63/// let mut de = Deserializer::with_resolver(
64/// xml_reader,
65/// DocTypeEntityResolver::default(),
66/// );
67/// let data: BTreeMap<String, String> = BTreeMap::deserialize(&mut de).unwrap();
68///
69/// assert_eq!(data.get("entity_one"), Some(&"entity 1".to_string()));
70/// ```
71pub trait EntityResolver {
72 /// The error type that represents DTD parse error
73 type Error: Error;
74
75 /// Called on contents of [`Event::DocType`] to capture declared entities.
76 /// Can be called multiple times, for each parsed `<!DOCTYPE >` declaration.
77 ///
78 /// [`Event::DocType`]: crate::events::Event::DocType
79 fn capture(&mut self, doctype: BytesText) -> Result<(), Self::Error>;
80
81 /// Called when an entity needs to be resolved.
82 ///
83 /// `None` is returned if a suitable value can not be found.
84 /// In that case an [`EscapeError::UnrecognizedEntity`] will be returned by
85 /// a deserializer.
86 ///
87 /// [`EscapeError::UnrecognizedEntity`]: crate::escape::EscapeError::UnrecognizedEntity
88 fn resolve(&self, entity: &str) -> Option<&str>;
89}
90
91/// An [`EntityResolver`] that resolves only predefined entities:
92///
93/// | Entity | Resolution
94/// |--------|------------
95/// |`<` | `<`
96/// |`>` | `>`
97/// |`&` | `&`
98/// |`'`| `'`
99/// |`"`| `"`
100#[derive(Default, Copy, Clone)]
101pub struct PredefinedEntityResolver;
102
103impl EntityResolver for PredefinedEntityResolver {
104 type Error = Infallible;
105
106 #[inline]
107 fn capture(&mut self, _doctype: BytesText) -> Result<(), Self::Error> {
108 Ok(())
109 }
110
111 #[inline]
112 fn resolve(&self, entity: &str) -> Option<&str> {
113 resolve_predefined_entity(entity)
114 }
115}