tauri_utils/
html.rs

1// Copyright 2019-2024 Tauri Programme within The Commons Conservancy
2// SPDX-License-Identifier: Apache-2.0
3// SPDX-License-Identifier: MIT
4
5//! The module to process HTML in Tauri.
6
7use std::path::{Path, PathBuf};
8
9use html5ever::{
10  interface::QualName,
11  namespace_url, ns,
12  serialize::{HtmlSerializer, SerializeOpts, Serializer, TraversalScope},
13  tendril::TendrilSink,
14  LocalName,
15};
16pub use kuchiki::NodeRef;
17use kuchiki::{Attribute, ExpandedName, NodeData};
18use serde::Serialize;
19#[cfg(feature = "isolation")]
20use serialize_to_javascript::DefaultTemplate;
21
22use crate::config::{DisabledCspModificationKind, PatternKind};
23#[cfg(feature = "isolation")]
24use crate::pattern::isolation::IsolationJavascriptCodegen;
25
26/// The token used for script nonces.
27pub const SCRIPT_NONCE_TOKEN: &str = "__TAURI_SCRIPT_NONCE__";
28/// The token used for style nonces.
29pub const STYLE_NONCE_TOKEN: &str = "__TAURI_STYLE_NONCE__";
30
31// taken from <https://github.com/kuchiki-rs/kuchiki/blob/57ee6920d835315a498e748ba4b07a851ae5e498/src/serializer.rs#L12>
32fn serialize_node_ref_internal<S: Serializer>(
33  node: &NodeRef,
34  serializer: &mut S,
35  traversal_scope: TraversalScope,
36) -> crate::Result<()> {
37  match (traversal_scope, node.data()) {
38    (ref scope, NodeData::Element(element)) => {
39      if *scope == TraversalScope::IncludeNode {
40        let attrs = element.attributes.borrow();
41
42        // Unfortunately we need to allocate something to hold these &'a QualName
43        let attrs = attrs
44          .map
45          .iter()
46          .map(|(name, attr)| {
47            (
48              QualName::new(attr.prefix.clone(), name.ns.clone(), name.local.clone()),
49              &attr.value,
50            )
51          })
52          .collect::<Vec<_>>();
53
54        serializer.start_elem(
55          element.name.clone(),
56          attrs.iter().map(|&(ref name, value)| (name, &**value)),
57        )?
58      }
59
60      let children = match element.template_contents.as_ref() {
61        Some(template_root) => template_root.children(),
62        None => node.children(),
63      };
64      for child in children {
65        serialize_node_ref_internal(&child, serializer, TraversalScope::IncludeNode)?
66      }
67
68      if *scope == TraversalScope::IncludeNode {
69        serializer.end_elem(element.name.clone())?
70      }
71      Ok(())
72    }
73
74    (_, &NodeData::DocumentFragment) | (_, &NodeData::Document(_)) => {
75      for child in node.children() {
76        serialize_node_ref_internal(&child, serializer, TraversalScope::IncludeNode)?
77      }
78      Ok(())
79    }
80
81    (TraversalScope::ChildrenOnly(_), _) => Ok(()),
82
83    (TraversalScope::IncludeNode, NodeData::Doctype(doctype)) => {
84      serializer.write_doctype(&doctype.name).map_err(Into::into)
85    }
86    (TraversalScope::IncludeNode, NodeData::Text(text)) => {
87      serializer.write_text(&text.borrow()).map_err(Into::into)
88    }
89    (TraversalScope::IncludeNode, NodeData::Comment(text)) => {
90      serializer.write_comment(&text.borrow()).map_err(Into::into)
91    }
92    (TraversalScope::IncludeNode, NodeData::ProcessingInstruction(contents)) => {
93      let contents = contents.borrow();
94      serializer
95        .write_processing_instruction(&contents.0, &contents.1)
96        .map_err(Into::into)
97    }
98  }
99}
100
101/// Serializes the node to HTML.
102pub fn serialize_node(node: &NodeRef) -> Vec<u8> {
103  let mut u8_vec = Vec::new();
104  let mut ser = HtmlSerializer::new(
105    &mut u8_vec,
106    SerializeOpts {
107      traversal_scope: TraversalScope::IncludeNode,
108      ..Default::default()
109    },
110  );
111  serialize_node_ref_internal(node, &mut ser, TraversalScope::IncludeNode).unwrap();
112  u8_vec
113}
114
115/// Parses the given HTML string.
116pub fn parse(html: String) -> NodeRef {
117  kuchiki::parse_html().one(html)
118}
119
120fn with_head<F: FnOnce(&NodeRef)>(document: &NodeRef, f: F) {
121  if let Ok(ref node) = document.select_first("head") {
122    f(node.as_node())
123  } else {
124    let node = NodeRef::new_element(
125      QualName::new(None, ns!(html), LocalName::from("head")),
126      None,
127    );
128    f(&node);
129    document.prepend(node)
130  }
131}
132
133fn inject_nonce(document: &NodeRef, selector: &str, token: &str) {
134  if let Ok(elements) = document.select(selector) {
135    for target in elements {
136      let node = target.as_node();
137      let element = node.as_element().unwrap();
138
139      let mut attrs = element.attributes.borrow_mut();
140      // if the node already has the `nonce` attribute, skip it
141      if attrs.get("nonce").is_some() {
142        continue;
143      }
144      attrs.insert("nonce", token.into());
145    }
146  }
147}
148
149/// Inject nonce tokens to all scripts and styles.
150pub fn inject_nonce_token(
151  document: &NodeRef,
152  dangerous_disable_asset_csp_modification: &DisabledCspModificationKind,
153) {
154  if dangerous_disable_asset_csp_modification.can_modify("script-src") {
155    inject_nonce(document, "script[src^='http']", SCRIPT_NONCE_TOKEN);
156  }
157  if dangerous_disable_asset_csp_modification.can_modify("style-src") {
158    inject_nonce(document, "style", STYLE_NONCE_TOKEN);
159  }
160}
161
162/// Injects a content security policy to the HTML.
163pub fn inject_csp(document: &NodeRef, csp: &str) {
164  with_head(document, |head| {
165    head.append(create_csp_meta_tag(csp));
166  });
167}
168
169fn create_csp_meta_tag(csp: &str) -> NodeRef {
170  NodeRef::new_element(
171    QualName::new(None, ns!(html), LocalName::from("meta")),
172    vec![
173      (
174        ExpandedName::new(ns!(), LocalName::from("http-equiv")),
175        Attribute {
176          prefix: None,
177          value: "Content-Security-Policy".into(),
178        },
179      ),
180      (
181        ExpandedName::new(ns!(), LocalName::from("content")),
182        Attribute {
183          prefix: None,
184          value: csp.into(),
185        },
186      ),
187    ],
188  )
189}
190
191/// The shape of the JavaScript Pattern config
192#[derive(Debug, Serialize)]
193#[serde(rename_all = "lowercase", tag = "pattern")]
194pub enum PatternObject {
195  /// Brownfield pattern.
196  Brownfield,
197  /// Isolation pattern. Recommended for security purposes.
198  Isolation {
199    /// Which `IsolationSide` this `PatternObject` is getting injected into
200    side: IsolationSide,
201  },
202}
203
204impl From<&PatternKind> for PatternObject {
205  fn from(pattern_kind: &PatternKind) -> Self {
206    match pattern_kind {
207      PatternKind::Brownfield => Self::Brownfield,
208      PatternKind::Isolation { .. } => Self::Isolation {
209        side: IsolationSide::default(),
210      },
211    }
212  }
213}
214
215/// Where the JavaScript is injected to
216#[derive(Debug, Serialize)]
217#[serde(rename_all = "lowercase")]
218pub enum IsolationSide {
219  /// Original frame, the Brownfield application
220  Original,
221  /// Secure frame, the isolation security application
222  Secure,
223}
224
225impl Default for IsolationSide {
226  fn default() -> Self {
227    Self::Original
228  }
229}
230
231/// Injects the Isolation JavaScript to a codegen time document.
232///
233/// Note: This function is not considered part of the stable API.
234#[cfg(feature = "isolation")]
235pub fn inject_codegen_isolation_script(document: &NodeRef) {
236  with_head(document, |head| {
237    let script = NodeRef::new_element(
238      QualName::new(None, ns!(html), "script".into()),
239      vec![(
240        ExpandedName::new(ns!(), LocalName::from("nonce")),
241        Attribute {
242          prefix: None,
243          value: SCRIPT_NONCE_TOKEN.into(),
244        },
245      )],
246    );
247    script.append(NodeRef::new_text(
248      IsolationJavascriptCodegen {}
249        .render_default(&Default::default())
250        .expect("unable to render codegen isolation script template")
251        .into_string(),
252    ));
253
254    head.prepend(script);
255  });
256}
257
258/// Temporary workaround for Windows not allowing requests
259///
260/// Note: this does not prevent path traversal due to the isolation application expectation that it
261/// is secure.
262pub fn inline_isolation(document: &NodeRef, dir: &Path) {
263  for script in document
264    .select("script[src]")
265    .expect("unable to parse document for scripts")
266  {
267    let src = {
268      let attributes = script.attributes.borrow();
269      attributes
270        .get(LocalName::from("src"))
271        .expect("script with src attribute has no src value")
272        .to_string()
273    };
274
275    let mut path = PathBuf::from(src);
276    if path.has_root() {
277      path = path
278        .strip_prefix("/")
279        .expect("Tauri \"Isolation\" Pattern only supports relative or absolute (`/`) paths.")
280        .into();
281    }
282
283    let file = std::fs::read_to_string(dir.join(path)).expect("unable to find isolation file");
284    script.as_node().append(NodeRef::new_text(file));
285
286    let mut attributes = script.attributes.borrow_mut();
287    attributes.remove(LocalName::from("src"));
288  }
289}
290
291#[cfg(test)]
292mod tests {
293  use kuchiki::traits::*;
294
295  #[test]
296  fn csp() {
297    let htmls = vec![
298      "<html><head></head></html>".to_string(),
299      "<html></html>".to_string(),
300    ];
301    for html in htmls {
302      let document = kuchiki::parse_html().one(html);
303      let csp = "csp-string";
304      super::inject_csp(&document, csp);
305      assert_eq!(
306        document.to_string(),
307        format!(
308          r#"<html><head><meta http-equiv="Content-Security-Policy" content="{csp}"></head><body></body></html>"#,
309        )
310      );
311    }
312  }
313}