solana_perf/
cuda_runtime.rs

1// Module for cuda-related helper functions and wrappers.
2//
3// cudaHostRegister/cudaHostUnregister -
4//    apis for page-pinning memory. Cuda driver/hardware cannot overlap
5//    copies from host memory to GPU memory unless the memory is page-pinned and
6//    cannot be paged to disk. The cuda driver provides these interfaces to pin and unpin memory.
7
8use {
9    crate::{
10        perf_libs,
11        recycler::{RecyclerX, Reset},
12    },
13    rand::{seq::SliceRandom, Rng},
14    rayon::prelude::*,
15    serde::{Deserialize, Serialize},
16    std::{
17        ops::{Index, IndexMut},
18        os::raw::c_int,
19        slice::{Iter, IterMut, SliceIndex},
20        sync::Weak,
21    },
22};
23
24const CUDA_SUCCESS: c_int = 0;
25
26fn pin<T>(mem: &mut Vec<T>) {
27    if let Some(api) = perf_libs::api() {
28        use std::{ffi::c_void, mem::size_of};
29
30        let ptr = mem.as_mut_ptr();
31        let size = mem.capacity().saturating_mul(size_of::<T>());
32        let err = unsafe {
33            (api.cuda_host_register)(ptr as *mut c_void, size, /*flags=*/ 0)
34        };
35        assert!(
36            err == CUDA_SUCCESS,
37            "cudaHostRegister error: {err} ptr: {ptr:?} bytes: {size}"
38        );
39    }
40}
41
42fn unpin<T>(mem: *mut T) {
43    if let Some(api) = perf_libs::api() {
44        use std::ffi::c_void;
45
46        let err = unsafe { (api.cuda_host_unregister)(mem as *mut c_void) };
47        assert!(
48            err == CUDA_SUCCESS,
49            "cudaHostUnregister returned: {err} ptr: {mem:?}"
50        );
51    }
52}
53
54// A vector wrapper where the underlying memory can be
55// page-pinned. Controlled by flags in case user only wants
56// to pin in certain circumstances.
57#[cfg_attr(feature = "frozen-abi", derive(AbiExample))]
58#[derive(Debug, Default, Serialize, Deserialize)]
59pub struct PinnedVec<T: Default + Clone + Sized> {
60    x: Vec<T>,
61    pinned: bool,
62    pinnable: bool,
63    #[serde(skip)]
64    recycler: Weak<RecyclerX<PinnedVec<T>>>,
65}
66
67impl<T: Default + Clone + Sized> Reset for PinnedVec<T> {
68    fn reset(&mut self) {
69        self.resize(0, T::default());
70    }
71    fn warm(&mut self, size_hint: usize) {
72        self.set_pinnable();
73        self.resize(size_hint, T::default());
74    }
75    fn set_recycler(&mut self, recycler: Weak<RecyclerX<Self>>) {
76        self.recycler = recycler;
77    }
78}
79
80impl<T: Clone + Default + Sized> From<PinnedVec<T>> for Vec<T> {
81    fn from(mut pinned_vec: PinnedVec<T>) -> Self {
82        if pinned_vec.pinned {
83            // If the vector is pinned and has a recycler, just return a clone
84            // so that the next allocation of a PinnedVec will recycle an
85            // already pinned one.
86            if pinned_vec.recycler.strong_count() != 0 {
87                return pinned_vec.x.clone();
88            }
89            unpin(pinned_vec.x.as_mut_ptr());
90            pinned_vec.pinned = false;
91        }
92        pinned_vec.pinnable = false;
93        pinned_vec.recycler = Weak::default();
94        std::mem::take(&mut pinned_vec.x)
95    }
96}
97
98impl<'a, T: Clone + Default + Sized> IntoIterator for &'a PinnedVec<T> {
99    type Item = &'a T;
100    type IntoIter = Iter<'a, T>;
101
102    fn into_iter(self) -> Self::IntoIter {
103        self.x.iter()
104    }
105}
106
107impl<T: Clone + Default + Sized, I: SliceIndex<[T]>> Index<I> for PinnedVec<T> {
108    type Output = I::Output;
109
110    #[inline]
111    fn index(&self, index: I) -> &Self::Output {
112        &self.x[index]
113    }
114}
115
116impl<T: Clone + Default + Sized, I: SliceIndex<[T]>> IndexMut<I> for PinnedVec<T> {
117    #[inline]
118    fn index_mut(&mut self, index: I) -> &mut Self::Output {
119        &mut self.x[index]
120    }
121}
122
123impl<T: Clone + Default + Sized> PinnedVec<T> {
124    pub fn iter(&self) -> Iter<'_, T> {
125        self.x.iter()
126    }
127
128    pub fn iter_mut(&mut self) -> IterMut<'_, T> {
129        self.x.iter_mut()
130    }
131
132    pub fn capacity(&self) -> usize {
133        self.x.capacity()
134    }
135}
136
137impl<'a, T: Clone + Send + Sync + Default + Sized> IntoParallelIterator for &'a PinnedVec<T> {
138    type Iter = rayon::slice::Iter<'a, T>;
139    type Item = &'a T;
140    fn into_par_iter(self) -> Self::Iter {
141        self.x.par_iter()
142    }
143}
144
145impl<'a, T: Clone + Send + Sync + Default + Sized> IntoParallelIterator for &'a mut PinnedVec<T> {
146    type Iter = rayon::slice::IterMut<'a, T>;
147    type Item = &'a mut T;
148    fn into_par_iter(self) -> Self::Iter {
149        self.x.par_iter_mut()
150    }
151}
152
153impl<T: Clone + Default + Sized> PinnedVec<T> {
154    pub fn reserve(&mut self, size: usize) {
155        self.x.reserve(size);
156    }
157
158    pub fn reserve_and_pin(&mut self, size: usize) {
159        if self.x.capacity() < size {
160            if self.pinned {
161                unpin(self.x.as_mut_ptr());
162                self.pinned = false;
163            }
164            self.x.reserve(size);
165        }
166        self.set_pinnable();
167        if !self.pinned {
168            pin(&mut self.x);
169            self.pinned = true;
170        }
171    }
172
173    pub fn set_pinnable(&mut self) {
174        self.pinnable = true;
175    }
176
177    pub fn copy_from_slice(&mut self, data: &[T])
178    where
179        T: Copy,
180    {
181        self.x.copy_from_slice(data);
182    }
183
184    pub fn from_vec(source: Vec<T>) -> Self {
185        Self {
186            x: source,
187            pinned: false,
188            pinnable: false,
189            recycler: Weak::default(),
190        }
191    }
192
193    pub fn with_capacity(capacity: usize) -> Self {
194        Self::from_vec(Vec::with_capacity(capacity))
195    }
196
197    pub fn is_empty(&self) -> bool {
198        self.x.is_empty()
199    }
200
201    pub fn len(&self) -> usize {
202        self.x.len()
203    }
204
205    pub fn as_ptr(&self) -> *const T {
206        self.x.as_ptr()
207    }
208
209    pub fn as_mut_ptr(&mut self) -> *mut T {
210        self.x.as_mut_ptr()
211    }
212
213    fn prepare_realloc(&mut self, new_size: usize) -> (*mut T, usize) {
214        let old_ptr = self.x.as_mut_ptr();
215        let old_capacity = self.x.capacity();
216        // Predict realloc and unpin.
217        if self.pinned && self.x.capacity() < new_size {
218            unpin(old_ptr);
219            self.pinned = false;
220        }
221        (old_ptr, old_capacity)
222    }
223
224    pub fn push(&mut self, x: T) {
225        let (old_ptr, old_capacity) = self.prepare_realloc(self.x.len().saturating_add(1));
226        self.x.push(x);
227        self.check_ptr(old_ptr, old_capacity, "push");
228    }
229
230    pub fn truncate(&mut self, size: usize) {
231        self.x.truncate(size);
232    }
233
234    pub fn resize(&mut self, size: usize, elem: T) {
235        let (old_ptr, old_capacity) = self.prepare_realloc(size);
236        self.x.resize(size, elem);
237        self.check_ptr(old_ptr, old_capacity, "resize");
238    }
239
240    pub fn append(&mut self, other: &mut Vec<T>) {
241        let (old_ptr, old_capacity) =
242            self.prepare_realloc(self.x.len().saturating_add(other.len()));
243        self.x.append(other);
244        self.check_ptr(old_ptr, old_capacity, "resize");
245    }
246
247    pub fn append_pinned(&mut self, other: &mut Self) {
248        let (old_ptr, old_capacity) =
249            self.prepare_realloc(self.x.len().saturating_add(other.len()));
250        self.x.append(&mut other.x);
251        self.check_ptr(old_ptr, old_capacity, "resize");
252    }
253
254    /// Forces the length of the vector to `new_len`.
255    ///
256    /// This is a low-level operation that maintains none of the normal
257    /// invariants of the type. Normally changing the length of a vector
258    /// is done using one of the safe operations instead, such as
259    /// [`truncate`], [`resize`], [`extend`], or [`clear`].
260    ///
261    /// [`truncate`]: Vec::truncate
262    /// [`resize`]: Vec::resize
263    /// [`extend`]: Extend::extend
264    /// [`clear`]: Vec::clear
265    ///
266    /// # Safety
267    ///
268    /// - `new_len` must be less than or equal to [`capacity()`].
269    /// - The elements at `old_len..new_len` must be initialized.
270    ///
271    /// [`capacity()`]: Vec::capacity
272    ///
273    pub unsafe fn set_len(&mut self, size: usize) {
274        self.x.set_len(size);
275    }
276
277    pub fn shuffle<R: Rng>(&mut self, rng: &mut R) {
278        self.x.shuffle(rng)
279    }
280
281    fn check_ptr(&mut self, old_ptr: *mut T, old_capacity: usize, from: &'static str) {
282        let api = perf_libs::api();
283        if api.is_some()
284            && self.pinnable
285            && (self.x.as_ptr() != old_ptr || self.x.capacity() != old_capacity)
286        {
287            if self.pinned {
288                unpin(old_ptr);
289            }
290
291            trace!(
292                "pinning from check_ptr old: {} size: {} from: {}",
293                old_capacity,
294                self.x.capacity(),
295                from
296            );
297            pin(&mut self.x);
298            self.pinned = true;
299        }
300    }
301}
302
303impl<T: Clone + Default + Sized> Clone for PinnedVec<T> {
304    fn clone(&self) -> Self {
305        let mut x = self.x.clone();
306        let pinned = if self.pinned {
307            pin(&mut x);
308            true
309        } else {
310            false
311        };
312        debug!(
313            "clone PinnedVec: size: {} pinned?: {} pinnable?: {}",
314            self.x.capacity(),
315            self.pinned,
316            self.pinnable
317        );
318        Self {
319            x,
320            pinned,
321            pinnable: self.pinnable,
322            recycler: self.recycler.clone(),
323        }
324    }
325}
326
327impl<T: Sized + Default + Clone> Drop for PinnedVec<T> {
328    fn drop(&mut self) {
329        if let Some(recycler) = self.recycler.upgrade() {
330            recycler.recycle(std::mem::take(self));
331        } else if self.pinned {
332            unpin(self.x.as_mut_ptr());
333        }
334    }
335}
336
337#[cfg(test)]
338mod tests {
339    use super::*;
340
341    #[test]
342    fn test_pinned_vec() {
343        let mut mem = PinnedVec::with_capacity(10);
344        mem.set_pinnable();
345        mem.push(50);
346        mem.resize(2, 10);
347        assert_eq!(mem[0], 50);
348        assert_eq!(mem[1], 10);
349        assert_eq!(mem.len(), 2);
350        assert!(!mem.is_empty());
351        let mut iter = mem.iter();
352        assert_eq!(*iter.next().unwrap(), 50);
353        assert_eq!(*iter.next().unwrap(), 10);
354        assert_eq!(iter.next(), None);
355    }
356}