wasmtime_cache/
config.rs

1//! Module for configuring the cache system.
2
3use super::Worker;
4use anyhow::{anyhow, bail, Context, Result};
5use directories_next::ProjectDirs;
6use log::{trace, warn};
7use serde::{
8    de::{self, Deserializer},
9    Deserialize,
10};
11use std::fmt::Debug;
12use std::fs;
13use std::path::{Path, PathBuf};
14use std::sync::atomic::{AtomicUsize, Ordering::SeqCst};
15use std::sync::Arc;
16use std::time::Duration;
17
18// wrapped, so we have named section in config,
19// also, for possible future compatibility
20#[derive(serde_derive::Deserialize, Debug)]
21#[serde(deny_unknown_fields)]
22struct Config {
23    cache: CacheConfig,
24}
25
26/// Global configuration for how the cache is managed
27#[derive(serde_derive::Deserialize, Debug, Clone)]
28#[serde(deny_unknown_fields)]
29pub struct CacheConfig {
30    enabled: bool,
31    directory: Option<PathBuf>,
32    #[serde(
33        default,
34        rename = "worker-event-queue-size",
35        deserialize_with = "deserialize_si_prefix"
36    )]
37    worker_event_queue_size: Option<u64>,
38    #[serde(rename = "baseline-compression-level")]
39    baseline_compression_level: Option<i32>,
40    #[serde(rename = "optimized-compression-level")]
41    optimized_compression_level: Option<i32>,
42    #[serde(
43        default,
44        rename = "optimized-compression-usage-counter-threshold",
45        deserialize_with = "deserialize_si_prefix"
46    )]
47    optimized_compression_usage_counter_threshold: Option<u64>,
48    #[serde(
49        default,
50        rename = "cleanup-interval",
51        deserialize_with = "deserialize_duration"
52    )]
53    cleanup_interval: Option<Duration>,
54    #[serde(
55        default,
56        rename = "optimizing-compression-task-timeout",
57        deserialize_with = "deserialize_duration"
58    )]
59    optimizing_compression_task_timeout: Option<Duration>,
60    #[serde(
61        default,
62        rename = "allowed-clock-drift-for-files-from-future",
63        deserialize_with = "deserialize_duration"
64    )]
65    allowed_clock_drift_for_files_from_future: Option<Duration>,
66    #[serde(
67        default,
68        rename = "file-count-soft-limit",
69        deserialize_with = "deserialize_si_prefix"
70    )]
71    file_count_soft_limit: Option<u64>,
72    #[serde(
73        default,
74        rename = "files-total-size-soft-limit",
75        deserialize_with = "deserialize_disk_space"
76    )]
77    files_total_size_soft_limit: Option<u64>,
78    #[serde(
79        default,
80        rename = "file-count-limit-percent-if-deleting",
81        deserialize_with = "deserialize_percent"
82    )]
83    file_count_limit_percent_if_deleting: Option<u8>,
84    #[serde(
85        default,
86        rename = "files-total-size-limit-percent-if-deleting",
87        deserialize_with = "deserialize_percent"
88    )]
89    files_total_size_limit_percent_if_deleting: Option<u8>,
90
91    #[serde(skip)]
92    worker: Option<Worker>,
93    #[serde(skip)]
94    state: Arc<CacheState>,
95}
96
97#[derive(Default, Debug)]
98struct CacheState {
99    hits: AtomicUsize,
100    misses: AtomicUsize,
101}
102
103/// Creates a new configuration file at specified path, or default path if None is passed.
104/// Fails if file already exists.
105pub fn create_new_config<P: AsRef<Path> + Debug>(config_file: Option<P>) -> Result<PathBuf> {
106    trace!("Creating new config file, path: {:?}", config_file);
107
108    let config_file = match config_file {
109        Some(path) => path.as_ref().to_path_buf(),
110        None => default_config_path()?,
111    };
112
113    if config_file.exists() {
114        bail!(
115            "Configuration file '{}' already exists.",
116            config_file.display()
117        );
118    }
119
120    let parent_dir = config_file
121        .parent()
122        .ok_or_else(|| anyhow!("Invalid cache config path: {}", config_file.display()))?;
123
124    fs::create_dir_all(parent_dir).with_context(|| {
125        format!(
126            "Failed to create config directory, config path: {}",
127            config_file.display(),
128        )
129    })?;
130
131    let content = "\
132# Comment out certain settings to use default values.
133# For more settings, please refer to the documentation:
134# https://bytecodealliance.github.io/wasmtime/cli-cache.html
135
136[cache]
137enabled = true
138";
139
140    fs::write(&config_file, content).with_context(|| {
141        format!(
142            "Failed to flush config to the disk, path: {}",
143            config_file.display(),
144        )
145    })?;
146
147    Ok(config_file.to_path_buf())
148}
149
150// permitted levels from: https://docs.rs/zstd/0.4.28+zstd.1.4.3/zstd/stream/write/struct.Encoder.html
151const ZSTD_COMPRESSION_LEVELS: std::ops::RangeInclusive<i32> = 0..=21;
152
153// Default settings, you're welcome to tune them!
154// TODO: what do we want to warn users about?
155
156// At the moment of writing, the modules couldn't depend on another,
157// so we have at most one module per wasmtime instance
158// if changed, update cli-cache.md
159const DEFAULT_WORKER_EVENT_QUEUE_SIZE: u64 = 0x10;
160const WORKER_EVENT_QUEUE_SIZE_WARNING_THRESHOLD: u64 = 3;
161// should be quick and provide good enough compression
162// if changed, update cli-cache.md
163const DEFAULT_BASELINE_COMPRESSION_LEVEL: i32 = zstd::DEFAULT_COMPRESSION_LEVEL;
164// should provide significantly better compression than baseline
165// if changed, update cli-cache.md
166const DEFAULT_OPTIMIZED_COMPRESSION_LEVEL: i32 = 20;
167// shouldn't be to low to avoid recompressing too many files
168// if changed, update cli-cache.md
169const DEFAULT_OPTIMIZED_COMPRESSION_USAGE_COUNTER_THRESHOLD: u64 = 0x100;
170// if changed, update cli-cache.md
171const DEFAULT_CLEANUP_INTERVAL: Duration = Duration::from_secs(60 * 60);
172// if changed, update cli-cache.md
173const DEFAULT_OPTIMIZING_COMPRESSION_TASK_TIMEOUT: Duration = Duration::from_secs(30 * 60);
174// the default assumes problems with timezone configuration on network share + some clock drift
175// please notice 24 timezones = max 23h difference between some of them
176// if changed, update cli-cache.md
177const DEFAULT_ALLOWED_CLOCK_DRIFT_FOR_FILES_FROM_FUTURE: Duration =
178    Duration::from_secs(60 * 60 * 24);
179// if changed, update cli-cache.md
180const DEFAULT_FILE_COUNT_SOFT_LIMIT: u64 = 0x10_000;
181// if changed, update cli-cache.md
182const DEFAULT_FILES_TOTAL_SIZE_SOFT_LIMIT: u64 = 1024 * 1024 * 512;
183// if changed, update cli-cache.md
184const DEFAULT_FILE_COUNT_LIMIT_PERCENT_IF_DELETING: u8 = 70;
185// if changed, update cli-cache.md
186const DEFAULT_FILES_TOTAL_SIZE_LIMIT_PERCENT_IF_DELETING: u8 = 70;
187
188fn project_dirs() -> Option<ProjectDirs> {
189    ProjectDirs::from("", "BytecodeAlliance", "wasmtime")
190}
191
192fn default_config_path() -> Result<PathBuf> {
193    match project_dirs() {
194        Some(dirs) => Ok(dirs.config_dir().join("config.toml")),
195        None => bail!("config file not specified and failed to get the default"),
196    }
197}
198
199// Deserializers of our custom formats
200// can be replaced with const generics later
201macro_rules! generate_deserializer {
202    ($name:ident($numname:ident: $numty:ty, $unitname:ident: &str) -> $retty:ty {$body:expr}) => {
203        fn $name<'de, D>(deserializer: D) -> Result<$retty, D::Error>
204        where
205            D: Deserializer<'de>,
206        {
207            let text = Option::<String>::deserialize(deserializer)?;
208            let text = match text {
209                None => return Ok(None),
210                Some(text) => text,
211            };
212            let text = text.trim();
213            let split_point = text.find(|c: char| !c.is_numeric());
214            let (num, unit) = split_point.map_or_else(|| (text, ""), |p| text.split_at(p));
215            let deserialized = (|| {
216                let $numname = num.parse::<$numty>().ok()?;
217                let $unitname = unit.trim();
218                $body
219            })();
220            if deserialized.is_some() {
221                Ok(deserialized)
222            } else {
223                Err(de::Error::custom(
224                    "Invalid value, please refer to the documentation",
225                ))
226            }
227        }
228    };
229}
230
231generate_deserializer!(deserialize_duration(num: u64, unit: &str) -> Option<Duration> {
232    match unit {
233        "s" => Some(Duration::from_secs(num)),
234        "m" => Some(Duration::from_secs(num * 60)),
235        "h" => Some(Duration::from_secs(num * 60 * 60)),
236        "d" => Some(Duration::from_secs(num * 60 * 60 * 24)),
237        _ => None,
238    }
239});
240
241generate_deserializer!(deserialize_si_prefix(num: u64, unit: &str) -> Option<u64> {
242    match unit {
243        "" => Some(num),
244        "K" => num.checked_mul(1_000),
245        "M" => num.checked_mul(1_000_000),
246        "G" => num.checked_mul(1_000_000_000),
247        "T" => num.checked_mul(1_000_000_000_000),
248        "P" => num.checked_mul(1_000_000_000_000_000),
249        _ => None,
250    }
251});
252
253generate_deserializer!(deserialize_disk_space(num: u64, unit: &str) -> Option<u64> {
254    match unit {
255        "" => Some(num),
256        "K" => num.checked_mul(1_000),
257        "Ki" => num.checked_mul(1u64 << 10),
258        "M" => num.checked_mul(1_000_000),
259        "Mi" => num.checked_mul(1u64 << 20),
260        "G" => num.checked_mul(1_000_000_000),
261        "Gi" => num.checked_mul(1u64 << 30),
262        "T" => num.checked_mul(1_000_000_000_000),
263        "Ti" => num.checked_mul(1u64 << 40),
264        "P" => num.checked_mul(1_000_000_000_000_000),
265        "Pi" => num.checked_mul(1u64 << 50),
266        _ => None,
267    }
268});
269
270generate_deserializer!(deserialize_percent(num: u8, unit: &str) -> Option<u8> {
271    match unit {
272        "%" => Some(num),
273        _ => None,
274    }
275});
276
277static CACHE_IMPROPER_CONFIG_ERROR_MSG: &str =
278    "Cache system should be enabled and all settings must be validated or defaulted";
279
280macro_rules! generate_setting_getter {
281    ($setting:ident: $setting_type:ty) => {
282        /// Returns `$setting`.
283        ///
284        /// Panics if the cache is disabled.
285        pub fn $setting(&self) -> $setting_type {
286            self.$setting.expect(CACHE_IMPROPER_CONFIG_ERROR_MSG)
287        }
288    };
289}
290
291impl CacheConfig {
292    generate_setting_getter!(worker_event_queue_size: u64);
293    generate_setting_getter!(baseline_compression_level: i32);
294    generate_setting_getter!(optimized_compression_level: i32);
295    generate_setting_getter!(optimized_compression_usage_counter_threshold: u64);
296    generate_setting_getter!(cleanup_interval: Duration);
297    generate_setting_getter!(optimizing_compression_task_timeout: Duration);
298    generate_setting_getter!(allowed_clock_drift_for_files_from_future: Duration);
299    generate_setting_getter!(file_count_soft_limit: u64);
300    generate_setting_getter!(files_total_size_soft_limit: u64);
301    generate_setting_getter!(file_count_limit_percent_if_deleting: u8);
302    generate_setting_getter!(files_total_size_limit_percent_if_deleting: u8);
303
304    /// Returns true if and only if the cache is enabled.
305    pub fn enabled(&self) -> bool {
306        self.enabled
307    }
308
309    /// Returns path to the cache directory.
310    ///
311    /// Panics if the cache is disabled.
312    pub fn directory(&self) -> &PathBuf {
313        self.directory
314            .as_ref()
315            .expect(CACHE_IMPROPER_CONFIG_ERROR_MSG)
316    }
317
318    /// Creates a new set of configuration which represents a disabled cache
319    pub fn new_cache_disabled() -> Self {
320        Self {
321            enabled: false,
322            directory: None,
323            worker_event_queue_size: None,
324            baseline_compression_level: None,
325            optimized_compression_level: None,
326            optimized_compression_usage_counter_threshold: None,
327            cleanup_interval: None,
328            optimizing_compression_task_timeout: None,
329            allowed_clock_drift_for_files_from_future: None,
330            file_count_soft_limit: None,
331            files_total_size_soft_limit: None,
332            file_count_limit_percent_if_deleting: None,
333            files_total_size_limit_percent_if_deleting: None,
334            worker: None,
335            state: Arc::new(CacheState::default()),
336        }
337    }
338
339    fn new_cache_enabled_template() -> Self {
340        let mut conf = Self::new_cache_disabled();
341        conf.enabled = true;
342        conf
343    }
344
345    /// Parses cache configuration from the file specified
346    pub fn from_file(config_file: Option<&Path>) -> Result<Self> {
347        let mut config = Self::load_and_parse_file(config_file)?;
348
349        // validate values and fill in defaults
350        config.validate_directory_or_default()?;
351        config.validate_worker_event_queue_size_or_default();
352        config.validate_baseline_compression_level_or_default()?;
353        config.validate_optimized_compression_level_or_default()?;
354        config.validate_optimized_compression_usage_counter_threshold_or_default();
355        config.validate_cleanup_interval_or_default();
356        config.validate_optimizing_compression_task_timeout_or_default();
357        config.validate_allowed_clock_drift_for_files_from_future_or_default();
358        config.validate_file_count_soft_limit_or_default();
359        config.validate_files_total_size_soft_limit_or_default();
360        config.validate_file_count_limit_percent_if_deleting_or_default()?;
361        config.validate_files_total_size_limit_percent_if_deleting_or_default()?;
362        config.spawn_worker();
363
364        Ok(config)
365    }
366
367    fn spawn_worker(&mut self) {
368        if self.enabled {
369            self.worker = Some(Worker::start_new(self));
370        }
371    }
372
373    pub(super) fn worker(&self) -> &Worker {
374        assert!(self.enabled);
375        self.worker.as_ref().unwrap()
376    }
377
378    /// Returns the number of cache hits seen so far
379    pub fn cache_hits(&self) -> usize {
380        self.state.hits.load(SeqCst)
381    }
382
383    /// Returns the number of cache misses seen so far
384    pub fn cache_misses(&self) -> usize {
385        self.state.misses.load(SeqCst)
386    }
387
388    pub(crate) fn on_cache_get_async(&self, path: impl AsRef<Path>) {
389        self.state.hits.fetch_add(1, SeqCst);
390        self.worker().on_cache_get_async(path)
391    }
392
393    pub(crate) fn on_cache_update_async(&self, path: impl AsRef<Path>) {
394        self.state.misses.fetch_add(1, SeqCst);
395        self.worker().on_cache_update_async(path)
396    }
397
398    fn load_and_parse_file(config_file: Option<&Path>) -> Result<Self> {
399        // get config file path
400        let (config_file, user_custom_file) = match config_file {
401            Some(path) => (path.to_path_buf(), true),
402            None => (default_config_path()?, false),
403        };
404
405        // read config, or use default one
406        let entity_exists = config_file.exists();
407        match (entity_exists, user_custom_file) {
408            (false, false) => Ok(Self::new_cache_enabled_template()),
409            _ => {
410                let contents = fs::read_to_string(&config_file).context(format!(
411                    "failed to read config file: {}",
412                    config_file.display()
413                ))?;
414                let config = toml::from_str::<Config>(&contents).context(format!(
415                    "failed to parse config file: {}",
416                    config_file.display()
417                ))?;
418                Ok(config.cache)
419            }
420        }
421    }
422
423    fn validate_directory_or_default(&mut self) -> Result<()> {
424        if self.directory.is_none() {
425            match project_dirs() {
426                Some(proj_dirs) => self.directory = Some(proj_dirs.cache_dir().to_path_buf()),
427                None => {
428                    bail!("Cache directory not specified and failed to get the default");
429                }
430            }
431        }
432
433        // On Windows, if we want long paths, we need '\\?\' prefix, but it doesn't work
434        // with relative paths. One way to get absolute path (the only one?) is to use
435        // fs::canonicalize, but it requires that given path exists. The extra advantage
436        // of this method is fact that the method prepends '\\?\' on Windows.
437        let cache_dir = self.directory.as_ref().unwrap();
438
439        if !cache_dir.is_absolute() {
440            bail!(
441                "Cache directory path has to be absolute, path: {}",
442                cache_dir.display(),
443            );
444        }
445
446        fs::create_dir_all(cache_dir).context(format!(
447            "failed to create cache directory: {}",
448            cache_dir.display()
449        ))?;
450        let canonical = fs::canonicalize(cache_dir).context(format!(
451            "failed to canonicalize cache directory: {}",
452            cache_dir.display()
453        ))?;
454        self.directory = Some(canonical);
455        Ok(())
456    }
457
458    fn validate_worker_event_queue_size_or_default(&mut self) {
459        if self.worker_event_queue_size.is_none() {
460            self.worker_event_queue_size = Some(DEFAULT_WORKER_EVENT_QUEUE_SIZE);
461        }
462
463        if self.worker_event_queue_size.unwrap() < WORKER_EVENT_QUEUE_SIZE_WARNING_THRESHOLD {
464            warn!("Detected small worker event queue size. Some messages might be lost.");
465        }
466    }
467
468    fn validate_baseline_compression_level_or_default(&mut self) -> Result<()> {
469        if self.baseline_compression_level.is_none() {
470            self.baseline_compression_level = Some(DEFAULT_BASELINE_COMPRESSION_LEVEL);
471        }
472
473        if !ZSTD_COMPRESSION_LEVELS.contains(&self.baseline_compression_level.unwrap()) {
474            bail!(
475                "Invalid baseline compression level: {} not in {:#?}",
476                self.baseline_compression_level.unwrap(),
477                ZSTD_COMPRESSION_LEVELS
478            );
479        }
480        Ok(())
481    }
482
483    // assumption: baseline compression level has been verified
484    fn validate_optimized_compression_level_or_default(&mut self) -> Result<()> {
485        if self.optimized_compression_level.is_none() {
486            self.optimized_compression_level = Some(DEFAULT_OPTIMIZED_COMPRESSION_LEVEL);
487        }
488
489        let opt_lvl = self.optimized_compression_level.unwrap();
490        let base_lvl = self.baseline_compression_level.unwrap();
491
492        if !ZSTD_COMPRESSION_LEVELS.contains(&opt_lvl) {
493            bail!(
494                "Invalid optimized compression level: {} not in {:#?}",
495                opt_lvl,
496                ZSTD_COMPRESSION_LEVELS
497            );
498        }
499
500        if opt_lvl < base_lvl {
501            bail!(
502                "Invalid optimized compression level is lower than baseline: {} < {}",
503                opt_lvl,
504                base_lvl
505            );
506        }
507        Ok(())
508    }
509
510    fn validate_optimized_compression_usage_counter_threshold_or_default(&mut self) {
511        if self.optimized_compression_usage_counter_threshold.is_none() {
512            self.optimized_compression_usage_counter_threshold =
513                Some(DEFAULT_OPTIMIZED_COMPRESSION_USAGE_COUNTER_THRESHOLD);
514        }
515    }
516
517    fn validate_cleanup_interval_or_default(&mut self) {
518        if self.cleanup_interval.is_none() {
519            self.cleanup_interval = Some(DEFAULT_CLEANUP_INTERVAL);
520        }
521    }
522
523    fn validate_optimizing_compression_task_timeout_or_default(&mut self) {
524        if self.optimizing_compression_task_timeout.is_none() {
525            self.optimizing_compression_task_timeout =
526                Some(DEFAULT_OPTIMIZING_COMPRESSION_TASK_TIMEOUT);
527        }
528    }
529
530    fn validate_allowed_clock_drift_for_files_from_future_or_default(&mut self) {
531        if self.allowed_clock_drift_for_files_from_future.is_none() {
532            self.allowed_clock_drift_for_files_from_future =
533                Some(DEFAULT_ALLOWED_CLOCK_DRIFT_FOR_FILES_FROM_FUTURE);
534        }
535    }
536
537    fn validate_file_count_soft_limit_or_default(&mut self) {
538        if self.file_count_soft_limit.is_none() {
539            self.file_count_soft_limit = Some(DEFAULT_FILE_COUNT_SOFT_LIMIT);
540        }
541    }
542
543    fn validate_files_total_size_soft_limit_or_default(&mut self) {
544        if self.files_total_size_soft_limit.is_none() {
545            self.files_total_size_soft_limit = Some(DEFAULT_FILES_TOTAL_SIZE_SOFT_LIMIT);
546        }
547    }
548
549    fn validate_file_count_limit_percent_if_deleting_or_default(&mut self) -> Result<()> {
550        if self.file_count_limit_percent_if_deleting.is_none() {
551            self.file_count_limit_percent_if_deleting =
552                Some(DEFAULT_FILE_COUNT_LIMIT_PERCENT_IF_DELETING);
553        }
554
555        let percent = self.file_count_limit_percent_if_deleting.unwrap();
556        if percent > 100 {
557            bail!(
558                "Invalid files count limit percent if deleting: {} not in range 0-100%",
559                percent
560            );
561        }
562        Ok(())
563    }
564
565    fn validate_files_total_size_limit_percent_if_deleting_or_default(&mut self) -> Result<()> {
566        if self.files_total_size_limit_percent_if_deleting.is_none() {
567            self.files_total_size_limit_percent_if_deleting =
568                Some(DEFAULT_FILES_TOTAL_SIZE_LIMIT_PERCENT_IF_DELETING);
569        }
570
571        let percent = self.files_total_size_limit_percent_if_deleting.unwrap();
572        if percent > 100 {
573            bail!(
574                "Invalid files total size limit percent if deleting: {} not in range 0-100%",
575                percent
576            );
577        }
578        Ok(())
579    }
580}
581
582#[cfg(test)]
583#[macro_use]
584pub mod tests;