tauri_utils/
html.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
// Copyright 2019-2024 Tauri Programme within The Commons Conservancy
// SPDX-License-Identifier: Apache-2.0
// SPDX-License-Identifier: MIT

//! The module to process HTML in Tauri.

use std::path::{Path, PathBuf};

use html5ever::{
  interface::QualName,
  namespace_url, ns,
  serialize::{HtmlSerializer, SerializeOpts, Serializer, TraversalScope},
  tendril::TendrilSink,
  LocalName,
};
pub use kuchiki::NodeRef;
use kuchiki::{Attribute, ExpandedName, NodeData};
use serde::Serialize;
#[cfg(feature = "isolation")]
use serialize_to_javascript::DefaultTemplate;

use crate::config::{DisabledCspModificationKind, PatternKind};
#[cfg(feature = "isolation")]
use crate::pattern::isolation::IsolationJavascriptCodegen;

/// The token used for script nonces.
pub const SCRIPT_NONCE_TOKEN: &str = "__TAURI_SCRIPT_NONCE__";
/// The token used for style nonces.
pub const STYLE_NONCE_TOKEN: &str = "__TAURI_STYLE_NONCE__";

// taken from <https://github.com/kuchiki-rs/kuchiki/blob/57ee6920d835315a498e748ba4b07a851ae5e498/src/serializer.rs#L12>
fn serialize_node_ref_internal<S: Serializer>(
  node: &NodeRef,
  serializer: &mut S,
  traversal_scope: TraversalScope,
) -> crate::Result<()> {
  match (traversal_scope, node.data()) {
    (ref scope, NodeData::Element(element)) => {
      if *scope == TraversalScope::IncludeNode {
        let attrs = element.attributes.borrow();

        // Unfortunately we need to allocate something to hold these &'a QualName
        let attrs = attrs
          .map
          .iter()
          .map(|(name, attr)| {
            (
              QualName::new(attr.prefix.clone(), name.ns.clone(), name.local.clone()),
              &attr.value,
            )
          })
          .collect::<Vec<_>>();

        serializer.start_elem(
          element.name.clone(),
          attrs.iter().map(|&(ref name, value)| (name, &**value)),
        )?
      }

      let children = match element.template_contents.as_ref() {
        Some(template_root) => template_root.children(),
        None => node.children(),
      };
      for child in children {
        serialize_node_ref_internal(&child, serializer, TraversalScope::IncludeNode)?
      }

      if *scope == TraversalScope::IncludeNode {
        serializer.end_elem(element.name.clone())?
      }
      Ok(())
    }

    (_, &NodeData::DocumentFragment) | (_, &NodeData::Document(_)) => {
      for child in node.children() {
        serialize_node_ref_internal(&child, serializer, TraversalScope::IncludeNode)?
      }
      Ok(())
    }

    (TraversalScope::ChildrenOnly(_), _) => Ok(()),

    (TraversalScope::IncludeNode, NodeData::Doctype(doctype)) => {
      serializer.write_doctype(&doctype.name).map_err(Into::into)
    }
    (TraversalScope::IncludeNode, NodeData::Text(text)) => {
      serializer.write_text(&text.borrow()).map_err(Into::into)
    }
    (TraversalScope::IncludeNode, NodeData::Comment(text)) => {
      serializer.write_comment(&text.borrow()).map_err(Into::into)
    }
    (TraversalScope::IncludeNode, NodeData::ProcessingInstruction(contents)) => {
      let contents = contents.borrow();
      serializer
        .write_processing_instruction(&contents.0, &contents.1)
        .map_err(Into::into)
    }
  }
}

/// Serializes the node to HTML.
pub fn serialize_node(node: &NodeRef) -> Vec<u8> {
  let mut u8_vec = Vec::new();
  let mut ser = HtmlSerializer::new(
    &mut u8_vec,
    SerializeOpts {
      traversal_scope: TraversalScope::IncludeNode,
      ..Default::default()
    },
  );
  serialize_node_ref_internal(node, &mut ser, TraversalScope::IncludeNode).unwrap();
  u8_vec
}

/// Parses the given HTML string.
pub fn parse(html: String) -> NodeRef {
  kuchiki::parse_html().one(html)
}

fn with_head<F: FnOnce(&NodeRef)>(document: &NodeRef, f: F) {
  if let Ok(ref node) = document.select_first("head") {
    f(node.as_node())
  } else {
    let node = NodeRef::new_element(
      QualName::new(None, ns!(html), LocalName::from("head")),
      None,
    );
    f(&node);
    document.prepend(node)
  }
}

fn inject_nonce(document: &NodeRef, selector: &str, token: &str) {
  if let Ok(elements) = document.select(selector) {
    for target in elements {
      let node = target.as_node();
      let element = node.as_element().unwrap();

      let mut attrs = element.attributes.borrow_mut();
      // if the node already has the `nonce` attribute, skip it
      if attrs.get("nonce").is_some() {
        continue;
      }
      attrs.insert("nonce", token.into());
    }
  }
}

/// Inject nonce tokens to all scripts and styles.
pub fn inject_nonce_token(
  document: &NodeRef,
  dangerous_disable_asset_csp_modification: &DisabledCspModificationKind,
) {
  if dangerous_disable_asset_csp_modification.can_modify("script-src") {
    inject_nonce(document, "script[src^='http']", SCRIPT_NONCE_TOKEN);
  }
  if dangerous_disable_asset_csp_modification.can_modify("style-src") {
    inject_nonce(document, "style", STYLE_NONCE_TOKEN);
  }
}

/// Injects a content security policy to the HTML.
pub fn inject_csp(document: &NodeRef, csp: &str) {
  with_head(document, |head| {
    head.append(create_csp_meta_tag(csp));
  });
}

fn create_csp_meta_tag(csp: &str) -> NodeRef {
  NodeRef::new_element(
    QualName::new(None, ns!(html), LocalName::from("meta")),
    vec![
      (
        ExpandedName::new(ns!(), LocalName::from("http-equiv")),
        Attribute {
          prefix: None,
          value: "Content-Security-Policy".into(),
        },
      ),
      (
        ExpandedName::new(ns!(), LocalName::from("content")),
        Attribute {
          prefix: None,
          value: csp.into(),
        },
      ),
    ],
  )
}

/// The shape of the JavaScript Pattern config
#[derive(Debug, Serialize)]
#[serde(rename_all = "lowercase", tag = "pattern")]
pub enum PatternObject {
  /// Brownfield pattern.
  Brownfield,
  /// Isolation pattern. Recommended for security purposes.
  Isolation {
    /// Which `IsolationSide` this `PatternObject` is getting injected into
    side: IsolationSide,
  },
}

impl From<&PatternKind> for PatternObject {
  fn from(pattern_kind: &PatternKind) -> Self {
    match pattern_kind {
      PatternKind::Brownfield => Self::Brownfield,
      PatternKind::Isolation { .. } => Self::Isolation {
        side: IsolationSide::default(),
      },
    }
  }
}

/// Where the JavaScript is injected to
#[derive(Debug, Serialize)]
#[serde(rename_all = "lowercase")]
pub enum IsolationSide {
  /// Original frame, the Brownfield application
  Original,
  /// Secure frame, the isolation security application
  Secure,
}

impl Default for IsolationSide {
  fn default() -> Self {
    Self::Original
  }
}

/// Injects the Isolation JavaScript to a codegen time document.
///
/// Note: This function is not considered part of the stable API.
#[cfg(feature = "isolation")]
pub fn inject_codegen_isolation_script(document: &NodeRef) {
  with_head(document, |head| {
    let script = NodeRef::new_element(
      QualName::new(None, ns!(html), "script".into()),
      vec![(
        ExpandedName::new(ns!(), LocalName::from("nonce")),
        Attribute {
          prefix: None,
          value: SCRIPT_NONCE_TOKEN.into(),
        },
      )],
    );
    script.append(NodeRef::new_text(
      IsolationJavascriptCodegen {}
        .render_default(&Default::default())
        .expect("unable to render codegen isolation script template")
        .into_string(),
    ));

    head.prepend(script);
  });
}

/// Temporary workaround for Windows not allowing requests
///
/// Note: this does not prevent path traversal due to the isolation application expectation that it
/// is secure.
pub fn inline_isolation(document: &NodeRef, dir: &Path) {
  for script in document
    .select("script[src]")
    .expect("unable to parse document for scripts")
  {
    let src = {
      let attributes = script.attributes.borrow();
      attributes
        .get(LocalName::from("src"))
        .expect("script with src attribute has no src value")
        .to_string()
    };

    let mut path = PathBuf::from(src);
    if path.has_root() {
      path = path
        .strip_prefix("/")
        .expect("Tauri \"Isolation\" Pattern only supports relative or absolute (`/`) paths.")
        .into();
    }

    let file = std::fs::read_to_string(dir.join(path)).expect("unable to find isolation file");
    script.as_node().append(NodeRef::new_text(file));

    let mut attributes = script.attributes.borrow_mut();
    attributes.remove(LocalName::from("src"));
  }
}

#[cfg(test)]
mod tests {
  use kuchiki::traits::*;

  #[test]
  fn csp() {
    let htmls = vec![
      "<html><head></head></html>".to_string(),
      "<html></html>".to_string(),
    ];
    for html in htmls {
      let document = kuchiki::parse_html().one(html);
      let csp = "csp-string";
      super::inject_csp(&document, csp);
      assert_eq!(
        document.to_string(),
        format!(
          r#"<html><head><meta http-equiv="Content-Security-Policy" content="{csp}"></head><body></body></html>"#,
        )
      );
    }
  }
}