gix_pack/cache/
lru.rs

1use super::DecodeEntry;
2
3#[cfg(feature = "pack-cache-lru-dynamic")]
4mod memory {
5    use super::DecodeEntry;
6    use crate::cache::set_vec_to_slice;
7    use clru::WeightScale;
8    use std::num::NonZeroUsize;
9
10    struct Entry {
11        data: Vec<u8>,
12        kind: gix_object::Kind,
13        compressed_size: usize,
14    }
15
16    type Key = (u32, u64);
17    struct CustomScale;
18
19    impl WeightScale<Key, Entry> for CustomScale {
20        fn weight(&self, _key: &Key, value: &Entry) -> usize {
21            value.data.len()
22        }
23    }
24
25    /// An LRU cache with hash map backing and an eviction rule based on the memory usage for object data in bytes.
26    pub struct MemoryCappedHashmap {
27        inner: clru::CLruCache<Key, Entry, std::collections::hash_map::RandomState, CustomScale>,
28        free_list: Vec<Vec<u8>>,
29        debug: gix_features::cache::Debug,
30    }
31
32    impl MemoryCappedHashmap {
33        /// Return a new instance which evicts least recently used items if it uses more than `memory_cap_in_bytes`
34        /// object data.
35        pub fn new(memory_cap_in_bytes: usize) -> MemoryCappedHashmap {
36            MemoryCappedHashmap {
37                inner: clru::CLruCache::with_config(
38                    clru::CLruCacheConfig::new(NonZeroUsize::new(memory_cap_in_bytes).expect("non zero"))
39                        .with_scale(CustomScale),
40                ),
41                free_list: Vec::new(),
42                debug: gix_features::cache::Debug::new(format!("MemoryCappedHashmap({memory_cap_in_bytes}B)")),
43            }
44        }
45    }
46
47    impl DecodeEntry for MemoryCappedHashmap {
48        fn put(&mut self, pack_id: u32, offset: u64, data: &[u8], kind: gix_object::Kind, compressed_size: usize) {
49            self.debug.put();
50            let Some(data) = set_vec_to_slice(self.free_list.pop().unwrap_or_default(), data) else {
51                return;
52            };
53            let res = self.inner.put_with_weight(
54                (pack_id, offset),
55                Entry {
56                    data,
57                    kind,
58                    compressed_size,
59                },
60            );
61            match res {
62                Ok(Some(previous_entry)) => self.free_list.push(previous_entry.data),
63                Ok(None) => {}
64                Err((_key, value)) => self.free_list.push(value.data),
65            }
66        }
67
68        fn get(&mut self, pack_id: u32, offset: u64, out: &mut Vec<u8>) -> Option<(gix_object::Kind, usize)> {
69            let res = self.inner.get(&(pack_id, offset)).and_then(|e| {
70                set_vec_to_slice(out, &e.data)?;
71                Some((e.kind, e.compressed_size))
72            });
73            if res.is_some() {
74                self.debug.hit();
75            } else {
76                self.debug.miss();
77            }
78            res
79        }
80    }
81}
82
83#[cfg(feature = "pack-cache-lru-dynamic")]
84pub use memory::MemoryCappedHashmap;
85
86#[cfg(feature = "pack-cache-lru-static")]
87mod _static {
88    use super::DecodeEntry;
89    use crate::cache::set_vec_to_slice;
90    struct Entry {
91        pack_id: u32,
92        offset: u64,
93        data: Vec<u8>,
94        kind: gix_object::Kind,
95        compressed_size: usize,
96    }
97
98    /// A cache using a least-recently-used implementation capable of storing the `SIZE` most recent objects.
99    /// The cache must be small as the search is 'naive' and the underlying data structure is a linked list.
100    /// Values of 64 seem to improve performance.
101    pub struct StaticLinkedList<const SIZE: usize> {
102        inner: uluru::LRUCache<Entry, SIZE>,
103        last_evicted: Vec<u8>,
104        debug: gix_features::cache::Debug,
105        /// the amount of bytes we are currently holding, taking into account the capacities of all Vecs we keep.
106        mem_used: usize,
107        /// The total amount of memory we should be able to hold with all entries combined.
108        mem_limit: usize,
109    }
110
111    impl<const SIZE: usize> StaticLinkedList<SIZE> {
112        /// Create a new list with a memory limit of `mem_limit` in bytes. If 0, there is no memory limit.
113        pub fn new(mem_limit: usize) -> Self {
114            StaticLinkedList {
115                inner: Default::default(),
116                last_evicted: Vec::new(),
117                debug: gix_features::cache::Debug::new(format!("StaticLinkedList<{SIZE}>")),
118                mem_used: 0,
119                mem_limit: if mem_limit == 0 { usize::MAX } else { mem_limit },
120            }
121        }
122    }
123
124    impl<const SIZE: usize> Default for StaticLinkedList<SIZE> {
125        fn default() -> Self {
126            Self::new(96 * 1024 * 1024)
127        }
128    }
129
130    impl<const SIZE: usize> DecodeEntry for StaticLinkedList<SIZE> {
131        fn put(&mut self, pack_id: u32, offset: u64, data: &[u8], kind: gix_object::Kind, compressed_size: usize) {
132            // We cannot possibly hold this much.
133            if data.len() > self.mem_limit {
134                return;
135            }
136            // If we could hold it but are at limit, all we can do is make space.
137            let mem_free = self.mem_limit - self.mem_used;
138            if data.len() > mem_free {
139                // prefer freeing free-lists instead of clearing our cache
140                let free_list_cap = self.last_evicted.len();
141                self.last_evicted = Vec::new();
142                // still not enough? clear everything
143                if data.len() > mem_free + free_list_cap {
144                    self.inner.clear();
145                    self.mem_used = 0;
146                } else {
147                    self.mem_used -= free_list_cap;
148                }
149            }
150            self.debug.put();
151            let mut v = std::mem::take(&mut self.last_evicted);
152            self.mem_used -= v.capacity();
153            if set_vec_to_slice(&mut v, data).is_none() {
154                return;
155            }
156            self.mem_used += v.capacity();
157            if let Some(previous) = self.inner.insert(Entry {
158                offset,
159                pack_id,
160                data: v,
161                kind,
162                compressed_size,
163            }) {
164                // No need to adjust capacity as we already counted it.
165                self.last_evicted = previous.data;
166            }
167        }
168
169        fn get(&mut self, pack_id: u32, offset: u64, out: &mut Vec<u8>) -> Option<(gix_object::Kind, usize)> {
170            let res = self.inner.lookup(|e: &mut Entry| {
171                if e.pack_id == pack_id && e.offset == offset {
172                    set_vec_to_slice(&mut *out, &e.data)?;
173                    Some((e.kind, e.compressed_size))
174                } else {
175                    None
176                }
177            });
178            if res.is_some() {
179                self.debug.hit();
180            } else {
181                self.debug.miss();
182            }
183            res
184        }
185    }
186
187    #[cfg(test)]
188    mod tests {
189        use super::*;
190
191        #[test]
192        fn no_limit() {
193            let c = StaticLinkedList::<10>::new(0);
194            assert_eq!(
195                c.mem_limit,
196                usize::MAX,
197                "zero is automatically turned into a large limit that is equivalent to unlimited"
198            );
199        }
200
201        #[test]
202        fn journey() {
203            let mut c = StaticLinkedList::<10>::new(100);
204            assert_eq!(c.mem_limit, 100);
205            assert_eq!(c.mem_used, 0);
206
207            // enough memory for normal operation
208            let mut last_mem_used = 0;
209            for _ in 0..10 {
210                c.put(0, 0, &[0], gix_object::Kind::Blob, 1);
211                assert!(c.mem_used > last_mem_used);
212                last_mem_used = c.mem_used;
213            }
214            assert_eq!(c.mem_used, 80, "there is a minimal vec size");
215            assert_eq!(c.inner.len(), 10);
216            assert_eq!(c.last_evicted.len(), 0);
217
218            c.put(0, 0, &(0..20).collect::<Vec<_>>(), gix_object::Kind::Blob, 1);
219            assert_eq!(c.inner.len(), 10);
220            assert_eq!(c.mem_used, 80 + 20);
221            assert_eq!(c.last_evicted.len(), 1);
222
223            c.put(0, 0, &(0..50).collect::<Vec<_>>(), gix_object::Kind::Blob, 1);
224            assert_eq!(c.inner.len(), 1, "cache clearance wasn't necessary");
225            assert_eq!(c.last_evicted.len(), 0, "the free list was cleared");
226            assert_eq!(c.mem_used, 50);
227
228            c.put(0, 0, &(0..101).collect::<Vec<_>>(), gix_object::Kind::Blob, 1);
229            assert_eq!(
230                c.inner.len(),
231                1,
232                "objects that won't ever fit within the memory limit are ignored"
233            );
234        }
235    }
236}
237
238#[cfg(feature = "pack-cache-lru-static")]
239pub use _static::StaticLinkedList;