pingora_cache/predictor.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228
// Copyright 2024 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//! Cacheability Predictor
use crate::hashtable::{ConcurrentLruCache, LruShard};
pub type CustomReasonPredicate = fn(&'static str) -> bool;
/// Cacheability Predictor
///
/// Remembers previously uncacheable assets.
/// Allows bypassing cache / cache lock early based on historical precedent.
///
/// NOTE: to simply avoid caching requests with certain characteristics,
/// add checks in request_cache_filter to avoid enabling cache in the first place.
/// The predictor's bypass mechanism handles cases where the request _looks_ cacheable
/// but its previous responses suggest otherwise. The request _could_ be cacheable in the future.
pub struct Predictor<const N_SHARDS: usize> {
uncacheable_keys: ConcurrentLruCache<(), N_SHARDS>,
skip_custom_reasons_fn: Option<CustomReasonPredicate>,
}
use crate::{key::CacheHashKey, CacheKey, NoCacheReason};
use log::debug;
/// The cache predictor trait.
///
/// This trait allows user defined predictor to replace [Predictor].
pub trait CacheablePredictor {
/// Return true if likely cacheable, false if likely not.
fn cacheable_prediction(&self, key: &CacheKey) -> bool;
/// Mark cacheable to allow next request to cache.
/// Returns false if the key was already marked cacheable.
fn mark_cacheable(&self, key: &CacheKey) -> bool;
/// Mark uncacheable to actively bypass cache on the next request.
/// May skip marking on certain NoCacheReasons.
/// Returns None if we skipped marking uncacheable.
/// Returns Some(false) if the key was already marked uncacheable.
fn mark_uncacheable(&self, key: &CacheKey, reason: NoCacheReason) -> Option<bool>;
}
// This particular bit of `where [LruShard...; N]: Default` nonsense arises from
// ConcurrentLruCache needing this trait bound, which in turns arises from the Rust
// compiler not being able to guarantee that all array sizes N implement `Default`.
// See https://github.com/rust-lang/rust/issues/61415
impl<const N_SHARDS: usize> Predictor<N_SHARDS>
where
[LruShard<()>; N_SHARDS]: Default,
{
/// Create a new Predictor with `N_SHARDS * shard_capacity` total capacity for
/// uncacheable cache keys.
///
/// - `shard_capacity`: defines number of keys remembered as uncacheable per LRU shard.
/// - `skip_custom_reasons_fn`: an optional predicate used in `mark_uncacheable`
/// that can customize which `Custom` `NoCacheReason`s ought to be remembered as uncacheable.
/// If the predicate returns true, then the predictor will skip remembering the current
/// cache key as uncacheable (and avoid bypassing cache on the next request).
pub fn new(
shard_capacity: usize,
skip_custom_reasons_fn: Option<CustomReasonPredicate>,
) -> Predictor<N_SHARDS> {
Predictor {
uncacheable_keys: ConcurrentLruCache::<(), N_SHARDS>::new(shard_capacity),
skip_custom_reasons_fn,
}
}
}
impl<const N_SHARDS: usize> CacheablePredictor for Predictor<N_SHARDS>
where
[LruShard<()>; N_SHARDS]: Default,
{
fn cacheable_prediction(&self, key: &CacheKey) -> bool {
// variance key is ignored because this check happens before cache lookup
let hash = key.primary_bin();
let key = u128::from_be_bytes(hash); // Endianness doesn't matter
// Note: LRU updated in mark_* functions only,
// as we assume the caller always updates the cacheability of the response later
!self.uncacheable_keys.read(key).contains(&key)
}
fn mark_cacheable(&self, key: &CacheKey) -> bool {
// variance key is ignored because cacheable_prediction() is called before cache lookup
// where the variance key is unknown
let hash = key.primary_bin();
let key = u128::from_be_bytes(hash);
let cache = self.uncacheable_keys.get(key);
if !cache.read().contains(&key) {
// not in uncacheable list, nothing to do
return true;
}
let mut cache = cache.write();
cache.pop(&key);
debug!("bypassed request became cacheable");
false
}
fn mark_uncacheable(&self, key: &CacheKey, reason: NoCacheReason) -> Option<bool> {
// only mark as uncacheable for the future on certain reasons,
// (e.g. InternalErrors)
use NoCacheReason::*;
match reason {
// CacheLockGiveUp: the writer will set OriginNotCache (if applicable)
// readers don't need to do it
NeverEnabled | StorageError | InternalError | Deferred | CacheLockGiveUp
| CacheLockTimeout => {
return None;
}
// Skip certain NoCacheReason::Custom according to user
Custom(reason) if self.skip_custom_reasons_fn.map_or(false, |f| f(reason)) => {
return None;
}
Custom(_) | OriginNotCache | ResponseTooLarge => { /* mark uncacheable for these only */
}
}
// variance key is ignored because cacheable_prediction() is called before cache lookup
// where the variance key is unknown
let hash = key.primary_bin();
let key = u128::from_be_bytes(hash);
let mut cache = self.uncacheable_keys.get(key).write();
// put() returns Some(old_value) if the key existed, else None
let new_key = cache.put(key, ()).is_none();
if new_key {
debug!("request marked uncacheable");
}
Some(new_key)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_mark_cacheability() {
let predictor = Predictor::<1>::new(10, None);
let key = CacheKey::new("a", "b", "c");
// cacheable if no history
assert!(predictor.cacheable_prediction(&key));
// don't remember internal / storage errors
predictor.mark_uncacheable(&key, NoCacheReason::InternalError);
assert!(predictor.cacheable_prediction(&key));
predictor.mark_uncacheable(&key, NoCacheReason::StorageError);
assert!(predictor.cacheable_prediction(&key));
// origin explicitly said uncacheable
predictor.mark_uncacheable(&key, NoCacheReason::OriginNotCache);
assert!(!predictor.cacheable_prediction(&key));
// mark cacheable again
predictor.mark_cacheable(&key);
assert!(predictor.cacheable_prediction(&key));
}
#[test]
fn test_custom_skip_predicate() {
let predictor = Predictor::<1>::new(
10,
Some(|custom_reason| matches!(custom_reason, "Skipping")),
);
let key = CacheKey::new("a", "b", "c");
// cacheable if no history
assert!(predictor.cacheable_prediction(&key));
// custom predicate still uses default skip reasons
predictor.mark_uncacheable(&key, NoCacheReason::InternalError);
assert!(predictor.cacheable_prediction(&key));
// other custom reasons can still be marked uncacheable
predictor.mark_uncacheable(&key, NoCacheReason::Custom("DontCacheMe"));
assert!(!predictor.cacheable_prediction(&key));
let key = CacheKey::new("a", "c", "d");
assert!(predictor.cacheable_prediction(&key));
// specific custom reason is skipped
predictor.mark_uncacheable(&key, NoCacheReason::Custom("Skipping"));
assert!(predictor.cacheable_prediction(&key));
}
#[test]
fn test_mark_uncacheable_lru() {
let predictor = Predictor::<1>::new(3, None);
let key1 = CacheKey::new("a", "b", "c");
predictor.mark_uncacheable(&key1, NoCacheReason::OriginNotCache);
assert!(!predictor.cacheable_prediction(&key1));
let key2 = CacheKey::new("a", "bc", "c");
predictor.mark_uncacheable(&key2, NoCacheReason::OriginNotCache);
assert!(!predictor.cacheable_prediction(&key2));
let key3 = CacheKey::new("a", "cd", "c");
predictor.mark_uncacheable(&key3, NoCacheReason::OriginNotCache);
assert!(!predictor.cacheable_prediction(&key3));
// promote / reinsert key1
predictor.mark_uncacheable(&key1, NoCacheReason::OriginNotCache);
let key4 = CacheKey::new("a", "de", "c");
predictor.mark_uncacheable(&key4, NoCacheReason::OriginNotCache);
assert!(!predictor.cacheable_prediction(&key4));
// key 1 was recently used
assert!(!predictor.cacheable_prediction(&key1));
// key 2 was evicted
assert!(predictor.cacheable_prediction(&key2));
assert!(!predictor.cacheable_prediction(&key3));
assert!(!predictor.cacheable_prediction(&key4));
}
}