1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104
//! Entity resolver module
use std::convert::Infallible;
use std::error::Error;
use crate::events::BytesText;
/// Used to resolve unknown entities while parsing
///
/// # Example
///
/// ```
/// # use serde::Deserialize;
/// # use pretty_assertions::assert_eq;
/// use regex::bytes::Regex;
/// use std::collections::BTreeMap;
/// use std::string::FromUtf8Error;
/// use quick_xml::de::{Deserializer, EntityResolver};
/// use quick_xml::events::BytesText;
///
/// struct DocTypeEntityResolver {
/// re: Regex,
/// map: BTreeMap<String, String>,
/// }
///
/// impl Default for DocTypeEntityResolver {
/// fn default() -> Self {
/// Self {
/// // We do not focus on true parsing in this example
/// // You should use special libraries to parse DTD
/// re: Regex::new(r#"<!ENTITY\s+([^ \t\r\n]+)\s+"([^"]*)"\s*>"#).unwrap(),
/// map: BTreeMap::new(),
/// }
/// }
/// }
///
/// impl EntityResolver for DocTypeEntityResolver {
/// type Error = FromUtf8Error;
///
/// fn capture(&mut self, doctype: BytesText) -> Result<(), Self::Error> {
/// for cap in self.re.captures_iter(&doctype) {
/// self.map.insert(
/// String::from_utf8(cap[1].to_vec())?,
/// String::from_utf8(cap[2].to_vec())?,
/// );
/// }
/// Ok(())
/// }
///
/// fn resolve(&self, entity: &str) -> Option<&str> {
/// self.map.get(entity).map(|s| s.as_str())
/// }
/// }
///
/// let xml_reader = br#"
/// <!DOCTYPE dict[ <!ENTITY e1 "entity 1"> ]>
/// <root>
/// <entity_one>&e1;</entity_one>
/// </root>
/// "#.as_ref();
///
/// let mut de = Deserializer::with_resolver(
/// xml_reader,
/// DocTypeEntityResolver::default(),
/// );
/// let data: BTreeMap<String, String> = BTreeMap::deserialize(&mut de).unwrap();
///
/// assert_eq!(data.get("entity_one"), Some(&"entity 1".to_string()));
/// ```
pub trait EntityResolver {
/// The error type that represents DTD parse error
type Error: Error;
/// Called on contents of [`Event::DocType`] to capture declared entities.
/// Can be called multiple times, for each parsed `<!DOCTYPE >` declaration.
///
/// [`Event::DocType`]: crate::events::Event::DocType
fn capture(&mut self, doctype: BytesText) -> Result<(), Self::Error>;
/// Called when an entity needs to be resolved.
///
/// `None` is returned if a suitable value can not be found.
/// In that case an [`EscapeError::UnrecognizedSymbol`] will be returned by
/// a deserializer.
///
/// [`EscapeError::UnrecognizedSymbol`]: crate::escape::EscapeError::UnrecognizedSymbol
fn resolve(&self, entity: &str) -> Option<&str>;
}
/// An `EntityResolver` that does nothing and always returns `None`.
#[derive(Default, Copy, Clone)]
pub struct NoEntityResolver;
impl EntityResolver for NoEntityResolver {
type Error = Infallible;
fn capture(&mut self, _doctype: BytesText) -> Result<(), Self::Error> {
Ok(())
}
fn resolve(&self, _entity: &str) -> Option<&str> {
None
}
}