xml/
escape.rs

1//! Contains functions for performing XML special characters escaping.
2
3use std::borrow::Cow;
4use std::fmt::{Display, Formatter, Result};
5use std::marker::PhantomData;
6
7pub(crate) trait Escapes {
8    fn escape(c: u8) -> Option<&'static str>;
9
10    fn byte_needs_escaping(c: u8) -> bool {
11        Self::escape(c).is_some()
12    }
13
14    fn str_needs_escaping(s: &str) -> bool {
15        s.bytes().any(|c| Self::escape(c).is_some())
16    }
17}
18
19pub(crate) struct Escaped<'a, E: Escapes> {
20    _escape_phantom: PhantomData<E>,
21    to_escape: &'a str,
22}
23
24impl<'a, E: Escapes> Escaped<'a, E> {
25    pub const fn new(s: &'a str) -> Self {
26        Escaped {
27            _escape_phantom: PhantomData,
28            to_escape: s,
29        }
30    }
31}
32
33impl<E: Escapes> Display for Escaped<'_, E> {
34    fn fmt(&self, f: &mut Formatter<'_>) -> Result {
35        let mut total_remaining = self.to_escape;
36
37        // find the next occurence
38        while let Some(n) = total_remaining.bytes().position(E::byte_needs_escaping) {
39            let (start, remaining) = total_remaining.split_at(n);
40
41            f.write_str(start)?;
42
43            // unwrap is safe because we checked is_some for position n earlier
44            let next_byte = remaining.bytes().next().unwrap();
45            let replacement = E::escape(next_byte).unwrap_or("unexpected token");
46            f.write_str(replacement)?;
47
48            total_remaining = &remaining[1..];
49        }
50
51        f.write_str(total_remaining)
52    }
53}
54
55fn escape_str<E: Escapes>(s: &str) -> Cow<'_, str> {
56    if E::str_needs_escaping(s) {
57        Cow::Owned(Escaped::<E>::new(s).to_string())
58    } else {
59        Cow::Borrowed(s)
60    }
61}
62
63macro_rules! escapes {
64    {
65        $name: ident,
66        $($k: expr => $v: expr),* $(,)?
67    } => {
68        pub(crate) struct $name;
69
70        impl Escapes for $name {
71            fn escape(c: u8) -> Option<&'static str> {
72                match c {
73                    $( $k => Some($v),)*
74                    _ => None
75                }
76            }
77        }
78    };
79}
80
81escapes!(
82    AttributeEscapes,
83    b'<'  => "&lt;",
84    b'>'  => "&gt;",
85    b'"'  => "&quot;",
86    b'\'' => "&apos;",
87    b'&'  => "&amp;",
88    b'\n' => "&#xA;",
89    b'\r' => "&#xD;",
90);
91
92escapes!(
93    PcDataEscapes,
94    b'<' => "&lt;",
95    b'>' => "&gt;",
96    b'&' => "&amp;",
97);
98
99/// Performs escaping of common XML characters inside an attribute value.
100///
101/// This function replaces several important markup characters with their
102/// entity equivalents:
103///
104/// * `<` → `&lt;`
105/// * `>` → `&gt;`
106/// * `"` → `&quot;`
107/// * `'` → `&apos;`
108/// * `&` → `&amp;`
109///
110/// The following characters are escaped so that attributes are printed on
111/// a single line:
112/// * `\n` → `&#xA;`
113/// * `\r` → `&#xD;`
114///
115/// The resulting string is safe to use inside XML attribute values or in PCDATA sections.
116///
117/// Does not perform allocations if the given string does not contain escapable characters.
118#[inline]
119#[must_use]
120pub fn escape_str_attribute(s: &str) -> Cow<'_, str> {
121    escape_str::<AttributeEscapes>(s)
122}
123
124/// Performs escaping of common XML characters inside PCDATA.
125///
126/// This function replaces several important markup characters with their
127/// entity equivalents:
128///
129/// * `<` → `&lt;`
130/// * `&` → `&amp;`
131///
132/// The resulting string is safe to use inside PCDATA sections but NOT inside attribute values.
133///
134/// Does not perform allocations if the given string does not contain escapable characters.
135#[inline]
136#[must_use]
137pub fn escape_str_pcdata(s: &str) -> Cow<'_, str> {
138    escape_str::<PcDataEscapes>(s)
139}
140
141#[cfg(test)]
142mod tests {
143    use super::{escape_str_attribute, escape_str_pcdata};
144
145    #[test]
146    fn test_escape_str_attribute() {
147        assert_eq!(escape_str_attribute("<>'\"&\n\r"), "&lt;&gt;&apos;&quot;&amp;&#xA;&#xD;");
148        assert_eq!(escape_str_attribute("no_escapes"), "no_escapes");
149    }
150
151    #[test]
152    fn test_escape_str_pcdata() {
153        assert_eq!(escape_str_pcdata("<>&"), "&lt;&gt;&amp;");
154        assert_eq!(escape_str_pcdata("no_escapes"), "no_escapes");
155    }
156
157    #[test]
158    fn test_escape_multibyte_code_points() {
159        assert_eq!(escape_str_attribute("☃<"), "☃&lt;");
160        assert_eq!(escape_str_pcdata("☃<"), "☃&lt;");
161    }
162}