wasmtime_cache/
lib.rs

1use base64::Engine;
2use log::{debug, trace, warn};
3use serde::{Deserialize, Serialize};
4use sha2::{Digest, Sha256};
5use std::hash::Hash;
6use std::hash::Hasher;
7use std::io::Write;
8use std::path::{Path, PathBuf};
9use std::{fs, io};
10
11#[macro_use] // for tests
12mod config;
13mod worker;
14
15pub use config::{create_new_config, CacheConfig};
16use worker::Worker;
17
18/// Module level cache entry.
19pub struct ModuleCacheEntry<'config>(Option<ModuleCacheEntryInner<'config>>);
20
21struct ModuleCacheEntryInner<'config> {
22    root_path: PathBuf,
23    cache_config: &'config CacheConfig,
24}
25
26struct Sha256Hasher(Sha256);
27
28impl<'config> ModuleCacheEntry<'config> {
29    /// Create the cache entry.
30    pub fn new(compiler_name: &str, cache_config: &'config CacheConfig) -> Self {
31        if cache_config.enabled() {
32            Self(Some(ModuleCacheEntryInner::new(
33                compiler_name,
34                cache_config,
35            )))
36        } else {
37            Self(None)
38        }
39    }
40
41    #[cfg(test)]
42    fn from_inner(inner: ModuleCacheEntryInner<'config>) -> Self {
43        Self(Some(inner))
44    }
45
46    /// Gets cached data if state matches, otherwise calls `compute`.
47    ///
48    /// Data is automatically serialized/deserialized with `bincode`.
49    pub fn get_data<T, U, E>(&self, state: T, compute: fn(&T) -> Result<U, E>) -> Result<U, E>
50    where
51        T: Hash,
52        U: Serialize + for<'a> Deserialize<'a>,
53    {
54        self.get_data_raw(
55            &state,
56            compute,
57            |_state, data| postcard::to_allocvec(data).ok(),
58            |_state, data| postcard::from_bytes(&data).ok(),
59        )
60    }
61
62    /// Gets cached data if state matches, otherwise calls `compute`.
63    ///
64    /// If the cache is disabled or no cached data is found then `compute` is
65    /// called to calculate the data. If the data was found in cache it is
66    /// passed to `deserialize`, which if successful will be the returned value.
67    /// When computed the `serialize` function is used to generate the bytes
68    /// from the returned value.
69    pub fn get_data_raw<T, U, E>(
70        &self,
71        state: &T,
72        // NOTE: These are function pointers instead of closures so that they
73        // don't accidentally close over something not accounted in the cache.
74        compute: fn(&T) -> Result<U, E>,
75        serialize: fn(&T, &U) -> Option<Vec<u8>>,
76        deserialize: fn(&T, Vec<u8>) -> Option<U>,
77    ) -> Result<U, E>
78    where
79        T: Hash,
80    {
81        let inner = match &self.0 {
82            Some(inner) => inner,
83            None => return compute(state),
84        };
85
86        let mut hasher = Sha256Hasher(Sha256::new());
87        state.hash(&mut hasher);
88        let hash: [u8; 32] = hasher.0.finalize().into();
89        // standard encoding uses '/' which can't be used for filename
90        let hash = base64::engine::general_purpose::URL_SAFE_NO_PAD.encode(&hash);
91
92        if let Some(cached_val) = inner.get_data(&hash) {
93            if let Some(val) = deserialize(state, cached_val) {
94                let mod_cache_path = inner.root_path.join(&hash);
95                inner.cache_config.on_cache_get_async(&mod_cache_path); // call on success
96                return Ok(val);
97            }
98        }
99        let val_to_cache = compute(state)?;
100        if let Some(bytes) = serialize(state, &val_to_cache) {
101            if inner.update_data(&hash, &bytes).is_some() {
102                let mod_cache_path = inner.root_path.join(&hash);
103                inner.cache_config.on_cache_update_async(&mod_cache_path); // call on success
104            }
105        }
106        Ok(val_to_cache)
107    }
108}
109
110impl<'config> ModuleCacheEntryInner<'config> {
111    fn new(compiler_name: &str, cache_config: &'config CacheConfig) -> Self {
112        // If debug assertions are enabled then assume that we're some sort of
113        // local build. We don't want local builds to stomp over caches between
114        // builds, so just use a separate cache directory based on the mtime of
115        // our executable, which should roughly correlate with "you changed the
116        // source code so you get a different directory".
117        //
118        // Otherwise if this is a release build we use the `GIT_REV` env var
119        // which is either the git rev if installed from git or the crate
120        // version if installed from crates.io.
121        let compiler_dir = if cfg!(debug_assertions) {
122            fn self_mtime() -> Option<String> {
123                let path = std::env::current_exe().ok()?;
124                let metadata = path.metadata().ok()?;
125                let mtime = metadata.modified().ok()?;
126                Some(match mtime.duration_since(std::time::UNIX_EPOCH) {
127                    Ok(dur) => format!("{}", dur.as_millis()),
128                    Err(err) => format!("m{}", err.duration().as_millis()),
129                })
130            }
131            let self_mtime = self_mtime().unwrap_or("no-mtime".to_string());
132            format!(
133                "{comp_name}-{comp_ver}-{comp_mtime}",
134                comp_name = compiler_name,
135                comp_ver = env!("GIT_REV"),
136                comp_mtime = self_mtime,
137            )
138        } else {
139            format!(
140                "{comp_name}-{comp_ver}",
141                comp_name = compiler_name,
142                comp_ver = env!("GIT_REV"),
143            )
144        };
145        let root_path = cache_config.directory().join("modules").join(compiler_dir);
146
147        Self {
148            root_path,
149            cache_config,
150        }
151    }
152
153    fn get_data(&self, hash: &str) -> Option<Vec<u8>> {
154        let mod_cache_path = self.root_path.join(hash);
155        trace!("get_data() for path: {}", mod_cache_path.display());
156        let compressed_cache_bytes = fs::read(&mod_cache_path).ok()?;
157        let cache_bytes = zstd::decode_all(&compressed_cache_bytes[..])
158            .map_err(|err| warn!("Failed to decompress cached code: {}", err))
159            .ok()?;
160        Some(cache_bytes)
161    }
162
163    fn update_data(&self, hash: &str, serialized_data: &[u8]) -> Option<()> {
164        let mod_cache_path = self.root_path.join(hash);
165        trace!("update_data() for path: {}", mod_cache_path.display());
166        let compressed_data = zstd::encode_all(
167            &serialized_data[..],
168            self.cache_config.baseline_compression_level(),
169        )
170        .map_err(|err| warn!("Failed to compress cached code: {}", err))
171        .ok()?;
172
173        // Optimize syscalls: first, try writing to disk. It should succeed in most cases.
174        // Otherwise, try creating the cache directory and retry writing to the file.
175        if fs_write_atomic(&mod_cache_path, "mod", &compressed_data).is_ok() {
176            return Some(());
177        }
178
179        debug!(
180            "Attempting to create the cache directory, because \
181             failed to write cached code to disk, path: {}",
182            mod_cache_path.display(),
183        );
184
185        let cache_dir = mod_cache_path.parent().unwrap();
186        fs::create_dir_all(cache_dir)
187            .map_err(|err| {
188                warn!(
189                    "Failed to create cache directory, path: {}, message: {}",
190                    cache_dir.display(),
191                    err
192                )
193            })
194            .ok()?;
195
196        match fs_write_atomic(&mod_cache_path, "mod", &compressed_data) {
197            Ok(_) => Some(()),
198            Err(err) => {
199                warn!(
200                    "Failed to write file with rename, target path: {}, err: {}",
201                    mod_cache_path.display(),
202                    err
203                );
204                None
205            }
206        }
207    }
208}
209
210impl Hasher for Sha256Hasher {
211    fn finish(&self) -> u64 {
212        panic!("Sha256Hasher doesn't support finish!");
213    }
214
215    fn write(&mut self, bytes: &[u8]) {
216        self.0.update(bytes);
217    }
218}
219
220// Assumption: path inside cache directory.
221// Then, we don't have to use sound OS-specific exclusive file access.
222// Note: there's no need to remove temporary file here - cleanup task will do it later.
223fn fs_write_atomic(path: &Path, reason: &str, contents: &[u8]) -> io::Result<()> {
224    let lock_path = path.with_extension(format!("wip-atomic-write-{reason}"));
225    fs::OpenOptions::new()
226        .create_new(true) // atomic file creation (assumption: no one will open it without this flag)
227        .write(true)
228        .open(&lock_path)
229        .and_then(|mut file| file.write_all(contents))
230        // file should go out of scope and be closed at this point
231        .and_then(|()| fs::rename(&lock_path, &path)) // atomic file rename
232}
233
234#[cfg(test)]
235mod tests;