1use {
9 crate::{
10 perf_libs,
11 recycler::{RecyclerX, Reset},
12 },
13 rand::{seq::SliceRandom, Rng},
14 rayon::prelude::*,
15 serde::{Deserialize, Serialize},
16 std::{
17 ops::{Index, IndexMut},
18 os::raw::c_int,
19 slice::{Iter, IterMut, SliceIndex},
20 sync::Weak,
21 },
22};
23
24const CUDA_SUCCESS: c_int = 0;
25
26fn pin<T>(mem: &mut Vec<T>) {
27 if let Some(api) = perf_libs::api() {
28 use std::{ffi::c_void, mem::size_of};
29
30 let ptr = mem.as_mut_ptr();
31 let size = mem.capacity().saturating_mul(size_of::<T>());
32 let err = unsafe {
33 (api.cuda_host_register)(ptr as *mut c_void, size, 0)
34 };
35 assert!(
36 err == CUDA_SUCCESS,
37 "cudaHostRegister error: {err} ptr: {ptr:?} bytes: {size}"
38 );
39 }
40}
41
42fn unpin<T>(mem: *mut T) {
43 if let Some(api) = perf_libs::api() {
44 use std::ffi::c_void;
45
46 let err = unsafe { (api.cuda_host_unregister)(mem as *mut c_void) };
47 assert!(
48 err == CUDA_SUCCESS,
49 "cudaHostUnregister returned: {err} ptr: {mem:?}"
50 );
51 }
52}
53
54#[cfg_attr(feature = "frozen-abi", derive(AbiExample))]
58#[derive(Debug, Default, Serialize, Deserialize)]
59pub struct PinnedVec<T: Default + Clone + Sized> {
60 x: Vec<T>,
61 pinned: bool,
62 pinnable: bool,
63 #[serde(skip)]
64 recycler: Weak<RecyclerX<PinnedVec<T>>>,
65}
66
67impl<T: Default + Clone + Sized> Reset for PinnedVec<T> {
68 fn reset(&mut self) {
69 self.resize(0, T::default());
70 }
71 fn warm(&mut self, size_hint: usize) {
72 self.set_pinnable();
73 self.resize(size_hint, T::default());
74 }
75 fn set_recycler(&mut self, recycler: Weak<RecyclerX<Self>>) {
76 self.recycler = recycler;
77 }
78}
79
80impl<T: Clone + Default + Sized> From<PinnedVec<T>> for Vec<T> {
81 fn from(mut pinned_vec: PinnedVec<T>) -> Self {
82 if pinned_vec.pinned {
83 if pinned_vec.recycler.strong_count() != 0 {
87 return pinned_vec.x.clone();
88 }
89 unpin(pinned_vec.x.as_mut_ptr());
90 pinned_vec.pinned = false;
91 }
92 pinned_vec.pinnable = false;
93 pinned_vec.recycler = Weak::default();
94 std::mem::take(&mut pinned_vec.x)
95 }
96}
97
98impl<'a, T: Clone + Default + Sized> IntoIterator for &'a PinnedVec<T> {
99 type Item = &'a T;
100 type IntoIter = Iter<'a, T>;
101
102 fn into_iter(self) -> Self::IntoIter {
103 self.x.iter()
104 }
105}
106
107impl<T: Clone + Default + Sized, I: SliceIndex<[T]>> Index<I> for PinnedVec<T> {
108 type Output = I::Output;
109
110 #[inline]
111 fn index(&self, index: I) -> &Self::Output {
112 &self.x[index]
113 }
114}
115
116impl<T: Clone + Default + Sized, I: SliceIndex<[T]>> IndexMut<I> for PinnedVec<T> {
117 #[inline]
118 fn index_mut(&mut self, index: I) -> &mut Self::Output {
119 &mut self.x[index]
120 }
121}
122
123impl<T: Clone + Default + Sized> PinnedVec<T> {
124 pub fn iter(&self) -> Iter<'_, T> {
125 self.x.iter()
126 }
127
128 pub fn iter_mut(&mut self) -> IterMut<'_, T> {
129 self.x.iter_mut()
130 }
131
132 pub fn capacity(&self) -> usize {
133 self.x.capacity()
134 }
135}
136
137impl<'a, T: Clone + Send + Sync + Default + Sized> IntoParallelIterator for &'a PinnedVec<T> {
138 type Iter = rayon::slice::Iter<'a, T>;
139 type Item = &'a T;
140 fn into_par_iter(self) -> Self::Iter {
141 self.x.par_iter()
142 }
143}
144
145impl<'a, T: Clone + Send + Sync + Default + Sized> IntoParallelIterator for &'a mut PinnedVec<T> {
146 type Iter = rayon::slice::IterMut<'a, T>;
147 type Item = &'a mut T;
148 fn into_par_iter(self) -> Self::Iter {
149 self.x.par_iter_mut()
150 }
151}
152
153impl<T: Clone + Default + Sized> PinnedVec<T> {
154 pub fn reserve(&mut self, size: usize) {
155 self.x.reserve(size);
156 }
157
158 pub fn reserve_and_pin(&mut self, size: usize) {
159 if self.x.capacity() < size {
160 if self.pinned {
161 unpin(self.x.as_mut_ptr());
162 self.pinned = false;
163 }
164 self.x.reserve(size);
165 }
166 self.set_pinnable();
167 if !self.pinned {
168 pin(&mut self.x);
169 self.pinned = true;
170 }
171 }
172
173 pub fn set_pinnable(&mut self) {
174 self.pinnable = true;
175 }
176
177 pub fn copy_from_slice(&mut self, data: &[T])
178 where
179 T: Copy,
180 {
181 self.x.copy_from_slice(data);
182 }
183
184 pub fn from_vec(source: Vec<T>) -> Self {
185 Self {
186 x: source,
187 pinned: false,
188 pinnable: false,
189 recycler: Weak::default(),
190 }
191 }
192
193 pub fn with_capacity(capacity: usize) -> Self {
194 Self::from_vec(Vec::with_capacity(capacity))
195 }
196
197 pub fn is_empty(&self) -> bool {
198 self.x.is_empty()
199 }
200
201 pub fn len(&self) -> usize {
202 self.x.len()
203 }
204
205 pub fn as_ptr(&self) -> *const T {
206 self.x.as_ptr()
207 }
208
209 pub fn as_mut_ptr(&mut self) -> *mut T {
210 self.x.as_mut_ptr()
211 }
212
213 fn prepare_realloc(&mut self, new_size: usize) -> (*mut T, usize) {
214 let old_ptr = self.x.as_mut_ptr();
215 let old_capacity = self.x.capacity();
216 if self.pinned && self.x.capacity() < new_size {
218 unpin(old_ptr);
219 self.pinned = false;
220 }
221 (old_ptr, old_capacity)
222 }
223
224 pub fn push(&mut self, x: T) {
225 let (old_ptr, old_capacity) = self.prepare_realloc(self.x.len().saturating_add(1));
226 self.x.push(x);
227 self.check_ptr(old_ptr, old_capacity, "push");
228 }
229
230 pub fn truncate(&mut self, size: usize) {
231 self.x.truncate(size);
232 }
233
234 pub fn resize(&mut self, size: usize, elem: T) {
235 let (old_ptr, old_capacity) = self.prepare_realloc(size);
236 self.x.resize(size, elem);
237 self.check_ptr(old_ptr, old_capacity, "resize");
238 }
239
240 pub fn append(&mut self, other: &mut Vec<T>) {
241 let (old_ptr, old_capacity) =
242 self.prepare_realloc(self.x.len().saturating_add(other.len()));
243 self.x.append(other);
244 self.check_ptr(old_ptr, old_capacity, "resize");
245 }
246
247 pub fn append_pinned(&mut self, other: &mut Self) {
248 let (old_ptr, old_capacity) =
249 self.prepare_realloc(self.x.len().saturating_add(other.len()));
250 self.x.append(&mut other.x);
251 self.check_ptr(old_ptr, old_capacity, "resize");
252 }
253
254 pub unsafe fn set_len(&mut self, size: usize) {
274 self.x.set_len(size);
275 }
276
277 pub fn shuffle<R: Rng>(&mut self, rng: &mut R) {
278 self.x.shuffle(rng)
279 }
280
281 fn check_ptr(&mut self, old_ptr: *mut T, old_capacity: usize, from: &'static str) {
282 let api = perf_libs::api();
283 if api.is_some()
284 && self.pinnable
285 && (self.x.as_ptr() != old_ptr || self.x.capacity() != old_capacity)
286 {
287 if self.pinned {
288 unpin(old_ptr);
289 }
290
291 trace!(
292 "pinning from check_ptr old: {} size: {} from: {}",
293 old_capacity,
294 self.x.capacity(),
295 from
296 );
297 pin(&mut self.x);
298 self.pinned = true;
299 }
300 }
301}
302
303impl<T: Clone + Default + Sized> Clone for PinnedVec<T> {
304 fn clone(&self) -> Self {
305 let mut x = self.x.clone();
306 let pinned = if self.pinned {
307 pin(&mut x);
308 true
309 } else {
310 false
311 };
312 debug!(
313 "clone PinnedVec: size: {} pinned?: {} pinnable?: {}",
314 self.x.capacity(),
315 self.pinned,
316 self.pinnable
317 );
318 Self {
319 x,
320 pinned,
321 pinnable: self.pinnable,
322 recycler: self.recycler.clone(),
323 }
324 }
325}
326
327impl<T: Sized + Default + Clone> Drop for PinnedVec<T> {
328 fn drop(&mut self) {
329 if let Some(recycler) = self.recycler.upgrade() {
330 recycler.recycle(std::mem::take(self));
331 } else if self.pinned {
332 unpin(self.x.as_mut_ptr());
333 }
334 }
335}
336
337#[cfg(test)]
338mod tests {
339 use super::*;
340
341 #[test]
342 fn test_pinned_vec() {
343 let mut mem = PinnedVec::with_capacity(10);
344 mem.set_pinnable();
345 mem.push(50);
346 mem.resize(2, 10);
347 assert_eq!(mem[0], 50);
348 assert_eq!(mem[1], 10);
349 assert_eq!(mem.len(), 2);
350 assert!(!mem.is_empty());
351 let mut iter = mem.iter();
352 assert_eq!(*iter.next().unwrap(), 50);
353 assert_eq!(*iter.next().unwrap(), 10);
354 assert_eq!(iter.next(), None);
355 }
356}