wgpu_hal/vulkan/mod.rs
1/*!
2# Vulkan API internals.
3
4## Stack memory
5
6Ash expects slices, which we don't generally have available.
7We cope with this requirement by the combination of the following ways:
8 - temporarily allocating `Vec` on heap, where overhead is permitted
9 - growing temporary local storage
10
11## Framebuffers and Render passes
12
13Render passes are cached on the device and kept forever.
14
15Framebuffers are also cached on the device, but they are removed when
16any of the image views (they have) gets removed.
17If Vulkan supports image-less framebuffers,
18then the actual views are excluded from the framebuffer key.
19
20## Fences
21
22If timeline semaphores are available, they are used 1:1 with wgpu-hal fences.
23Otherwise, we manage a pool of `VkFence` objects behind each `hal::Fence`.
24
25!*/
26
27mod adapter;
28mod command;
29mod conv;
30mod device;
31mod instance;
32mod sampler;
33
34use std::{
35 borrow::Borrow,
36 collections::HashSet,
37 ffi::{CStr, CString},
38 fmt, mem,
39 num::NonZeroU32,
40 sync::Arc,
41};
42
43use arrayvec::ArrayVec;
44use ash::{ext, khr, vk};
45use parking_lot::{Mutex, RwLock};
46use wgt::InternalCounter;
47
48const MILLIS_TO_NANOS: u64 = 1_000_000;
49const MAX_TOTAL_ATTACHMENTS: usize = crate::MAX_COLOR_ATTACHMENTS * 2 + 1;
50
51#[derive(Clone, Debug)]
52pub struct Api;
53
54impl crate::Api for Api {
55 type Instance = Instance;
56 type Surface = Surface;
57 type Adapter = Adapter;
58 type Device = Device;
59
60 type Queue = Queue;
61 type CommandEncoder = CommandEncoder;
62 type CommandBuffer = CommandBuffer;
63
64 type Buffer = Buffer;
65 type Texture = Texture;
66 type SurfaceTexture = SurfaceTexture;
67 type TextureView = TextureView;
68 type Sampler = Sampler;
69 type QuerySet = QuerySet;
70 type Fence = Fence;
71 type AccelerationStructure = AccelerationStructure;
72 type PipelineCache = PipelineCache;
73
74 type BindGroupLayout = BindGroupLayout;
75 type BindGroup = BindGroup;
76 type PipelineLayout = PipelineLayout;
77 type ShaderModule = ShaderModule;
78 type RenderPipeline = RenderPipeline;
79 type ComputePipeline = ComputePipeline;
80}
81
82crate::impl_dyn_resource!(
83 Adapter,
84 AccelerationStructure,
85 BindGroup,
86 BindGroupLayout,
87 Buffer,
88 CommandBuffer,
89 CommandEncoder,
90 ComputePipeline,
91 Device,
92 Fence,
93 Instance,
94 PipelineCache,
95 PipelineLayout,
96 QuerySet,
97 Queue,
98 RenderPipeline,
99 Sampler,
100 ShaderModule,
101 Surface,
102 SurfaceTexture,
103 Texture,
104 TextureView
105);
106
107struct DebugUtils {
108 extension: ext::debug_utils::Instance,
109 messenger: vk::DebugUtilsMessengerEXT,
110
111 /// Owning pointer to the debug messenger callback user data.
112 ///
113 /// `InstanceShared::drop` destroys the debug messenger before
114 /// dropping this, so the callback should never receive a dangling
115 /// user data pointer.
116 #[allow(dead_code)]
117 callback_data: Box<DebugUtilsMessengerUserData>,
118}
119
120pub struct DebugUtilsCreateInfo {
121 severity: vk::DebugUtilsMessageSeverityFlagsEXT,
122 message_type: vk::DebugUtilsMessageTypeFlagsEXT,
123 callback_data: Box<DebugUtilsMessengerUserData>,
124}
125
126#[derive(Debug)]
127/// The properties related to the validation layer needed for the
128/// DebugUtilsMessenger for their workarounds
129struct ValidationLayerProperties {
130 /// Validation layer description, from `vk::LayerProperties`.
131 layer_description: CString,
132
133 /// Validation layer specification version, from `vk::LayerProperties`.
134 layer_spec_version: u32,
135}
136
137/// User data needed by `instance::debug_utils_messenger_callback`.
138///
139/// When we create the [`vk::DebugUtilsMessengerEXT`], the `pUserData`
140/// pointer refers to one of these values.
141#[derive(Debug)]
142pub struct DebugUtilsMessengerUserData {
143 /// The properties related to the validation layer, if present
144 validation_layer_properties: Option<ValidationLayerProperties>,
145
146 /// If the OBS layer is present. OBS never increments the version of their layer,
147 /// so there's no reason to have the version.
148 has_obs_layer: bool,
149}
150
151pub struct InstanceShared {
152 raw: ash::Instance,
153 extensions: Vec<&'static CStr>,
154 drop_guard: Option<crate::DropGuard>,
155 flags: wgt::InstanceFlags,
156 debug_utils: Option<DebugUtils>,
157 get_physical_device_properties: Option<khr::get_physical_device_properties2::Instance>,
158 entry: ash::Entry,
159 has_nv_optimus: bool,
160 android_sdk_version: u32,
161 /// The instance API version.
162 ///
163 /// Which is the version of Vulkan supported for instance-level functionality.
164 ///
165 /// It is associated with a `VkInstance` and its children,
166 /// except for a `VkPhysicalDevice` and its children.
167 instance_api_version: u32,
168}
169
170pub struct Instance {
171 shared: Arc<InstanceShared>,
172}
173
174/// The semaphores needed to use one image in a swapchain.
175#[derive(Debug)]
176struct SwapchainImageSemaphores {
177 /// A semaphore that is signaled when this image is safe for us to modify.
178 ///
179 /// When [`vkAcquireNextImageKHR`] returns the index of the next swapchain
180 /// image that we should use, that image may actually still be in use by the
181 /// presentation engine, and is not yet safe to modify. However, that
182 /// function does accept a semaphore that it will signal when the image is
183 /// indeed safe to begin messing with.
184 ///
185 /// This semaphore is:
186 ///
187 /// - waited for by the first queue submission to operate on this image
188 /// since it was acquired, and
189 ///
190 /// - signaled by [`vkAcquireNextImageKHR`] when the acquired image is ready
191 /// for us to use.
192 ///
193 /// [`vkAcquireNextImageKHR`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkAcquireNextImageKHR
194 acquire: vk::Semaphore,
195
196 /// True if the next command submission operating on this image should wait
197 /// for [`acquire`].
198 ///
199 /// We must wait for `acquire` before drawing to this swapchain image, but
200 /// because `wgpu-hal` queue submissions are always strongly ordered, only
201 /// the first submission that works with a swapchain image actually needs to
202 /// wait. We set this flag when this image is acquired, and clear it the
203 /// first time it's passed to [`Queue::submit`] as a surface texture.
204 ///
205 /// [`acquire`]: SwapchainImageSemaphores::acquire
206 /// [`Queue::submit`]: crate::Queue::submit
207 should_wait_for_acquire: bool,
208
209 /// A pool of semaphores for ordering presentation after drawing.
210 ///
211 /// The first [`present_index`] semaphores in this vector are:
212 ///
213 /// - all waited on by the call to [`vkQueuePresentKHR`] that presents this
214 /// image, and
215 ///
216 /// - each signaled by some [`vkQueueSubmit`] queue submission that draws to
217 /// this image, when the submission finishes execution.
218 ///
219 /// This vector accumulates one semaphore per submission that writes to this
220 /// image. This is awkward, but hard to avoid: [`vkQueuePresentKHR`]
221 /// requires a semaphore to order it with respect to drawing commands, and
222 /// we can't attach new completion semaphores to a command submission after
223 /// it's been submitted. This means that, at submission time, we must create
224 /// the semaphore we might need if the caller's next action is to enqueue a
225 /// presentation of this image.
226 ///
227 /// An alternative strategy would be for presentation to enqueue an empty
228 /// submit, ordered relative to other submits in the usual way, and
229 /// signaling a single presentation semaphore. But we suspect that submits
230 /// are usually expensive enough, and semaphores usually cheap enough, that
231 /// performance-sensitive users will avoid making many submits, so that the
232 /// cost of accumulated semaphores will usually be less than the cost of an
233 /// additional submit.
234 ///
235 /// Only the first [`present_index`] semaphores in the vector are actually
236 /// going to be signalled by submitted commands, and need to be waited for
237 /// by the next present call. Any semaphores beyond that index were created
238 /// for prior presents and are simply being retained for recycling.
239 ///
240 /// [`present_index`]: SwapchainImageSemaphores::present_index
241 /// [`vkQueuePresentKHR`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueuePresentKHR
242 /// [`vkQueueSubmit`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueueSubmit
243 present: Vec<vk::Semaphore>,
244
245 /// The number of semaphores in [`present`] to be signalled for this submission.
246 ///
247 /// [`present`]: SwapchainImageSemaphores::present
248 present_index: usize,
249
250 /// The fence value of the last command submission that wrote to this image.
251 ///
252 /// The next time we try to acquire this image, we'll block until
253 /// this submission finishes, proving that [`acquire`] is ready to
254 /// pass to `vkAcquireNextImageKHR` again.
255 ///
256 /// [`acquire`]: SwapchainImageSemaphores::acquire
257 previously_used_submission_index: crate::FenceValue,
258}
259
260impl SwapchainImageSemaphores {
261 fn new(device: &DeviceShared) -> Result<Self, crate::DeviceError> {
262 Ok(Self {
263 acquire: device.new_binary_semaphore()?,
264 should_wait_for_acquire: true,
265 present: Vec::new(),
266 present_index: 0,
267 previously_used_submission_index: 0,
268 })
269 }
270
271 fn set_used_fence_value(&mut self, value: crate::FenceValue) {
272 self.previously_used_submission_index = value;
273 }
274
275 /// Return the semaphore that commands drawing to this image should wait for, if any.
276 ///
277 /// This only returns `Some` once per acquisition; see
278 /// [`SwapchainImageSemaphores::should_wait_for_acquire`] for details.
279 fn get_acquire_wait_semaphore(&mut self) -> Option<vk::Semaphore> {
280 if self.should_wait_for_acquire {
281 self.should_wait_for_acquire = false;
282 Some(self.acquire)
283 } else {
284 None
285 }
286 }
287
288 /// Return a semaphore that a submission that writes to this image should
289 /// signal when it's done.
290 ///
291 /// See [`SwapchainImageSemaphores::present`] for details.
292 fn get_submit_signal_semaphore(
293 &mut self,
294 device: &DeviceShared,
295 ) -> Result<vk::Semaphore, crate::DeviceError> {
296 // Try to recycle a semaphore we created for a previous presentation.
297 let sem = match self.present.get(self.present_index) {
298 Some(sem) => *sem,
299 None => {
300 let sem = device.new_binary_semaphore()?;
301 self.present.push(sem);
302 sem
303 }
304 };
305
306 self.present_index += 1;
307
308 Ok(sem)
309 }
310
311 /// Return the semaphores that a presentation of this image should wait on.
312 ///
313 /// Return a slice of semaphores that the call to [`vkQueueSubmit`] that
314 /// ends this image's acquisition should wait for. See
315 /// [`SwapchainImageSemaphores::present`] for details.
316 ///
317 /// Reset `self` to be ready for the next acquisition cycle.
318 ///
319 /// [`vkQueueSubmit`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueueSubmit
320 fn get_present_wait_semaphores(&mut self) -> &[vk::Semaphore] {
321 let old_index = self.present_index;
322
323 // Since this marks the end of this acquire/draw/present cycle, take the
324 // opportunity to reset `self` in preparation for the next acquisition.
325 self.present_index = 0;
326 self.should_wait_for_acquire = true;
327
328 &self.present[0..old_index]
329 }
330
331 unsafe fn destroy(&self, device: &ash::Device) {
332 unsafe {
333 device.destroy_semaphore(self.acquire, None);
334 for sem in &self.present {
335 device.destroy_semaphore(*sem, None);
336 }
337 }
338 }
339}
340
341struct Swapchain {
342 raw: vk::SwapchainKHR,
343 raw_flags: vk::SwapchainCreateFlagsKHR,
344 functor: khr::swapchain::Device,
345 device: Arc<DeviceShared>,
346 images: Vec<vk::Image>,
347 config: crate::SurfaceConfiguration,
348 view_formats: Vec<wgt::TextureFormat>,
349 /// One wait semaphore per swapchain image. This will be associated with the
350 /// surface texture, and later collected during submission.
351 ///
352 /// We need this to be `Arc<Mutex<>>` because we need to be able to pass this
353 /// data into the surface texture, so submit/present can use it.
354 surface_semaphores: Vec<Arc<Mutex<SwapchainImageSemaphores>>>,
355 /// The index of the next semaphore to use. Ideally we would use the same
356 /// index as the image index, but we need to specify the semaphore as an argument
357 /// to the acquire_next_image function which is what tells us which image to use.
358 next_semaphore_index: usize,
359 /// The present timing information which will be set in the next call to [`present()`](crate::Queue::present()).
360 ///
361 /// # Safety
362 ///
363 /// This must only be set if [`wgt::Features::VULKAN_GOOGLE_DISPLAY_TIMING`] is enabled, and
364 /// so the VK_GOOGLE_display_timing extension is present.
365 next_present_time: Option<vk::PresentTimeGOOGLE>,
366}
367
368impl Swapchain {
369 fn advance_surface_semaphores(&mut self) {
370 let semaphore_count = self.surface_semaphores.len();
371 self.next_semaphore_index = (self.next_semaphore_index + 1) % semaphore_count;
372 }
373
374 fn get_surface_semaphores(&self) -> Arc<Mutex<SwapchainImageSemaphores>> {
375 self.surface_semaphores[self.next_semaphore_index].clone()
376 }
377}
378
379pub struct Surface {
380 raw: vk::SurfaceKHR,
381 functor: khr::surface::Instance,
382 instance: Arc<InstanceShared>,
383 swapchain: RwLock<Option<Swapchain>>,
384}
385
386impl Surface {
387 /// Get the raw Vulkan swapchain associated with this surface.
388 ///
389 /// Returns [`None`] if the surface is not configured.
390 pub fn raw_swapchain(&self) -> Option<vk::SwapchainKHR> {
391 let read = self.swapchain.read();
392 read.as_ref().map(|it| it.raw)
393 }
394
395 /// Set the present timing information which will be used for the next [presentation](crate::Queue::present()) of this surface,
396 /// using [VK_GOOGLE_display_timing].
397 ///
398 /// This can be used to give an id to presentations, for future use of [`vk::PastPresentationTimingGOOGLE`].
399 /// Note that `wgpu-hal` does *not* provide a way to use that API - you should manually access this through [`ash`].
400 ///
401 /// This can also be used to add a "not before" timestamp to the presentation.
402 ///
403 /// The exact semantics of the fields are also documented in the [specification](https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPresentTimeGOOGLE.html) for the extension.
404 ///
405 /// # Panics
406 ///
407 /// - If the surface hasn't been configured.
408 /// - If the device doesn't [support present timing](wgt::Features::VULKAN_GOOGLE_DISPLAY_TIMING).
409 ///
410 /// [VK_GOOGLE_display_timing]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_GOOGLE_display_timing.html
411 #[track_caller]
412 pub fn set_next_present_time(&self, present_timing: vk::PresentTimeGOOGLE) {
413 let mut swapchain = self.swapchain.write();
414 let swapchain = swapchain
415 .as_mut()
416 .expect("Surface should have been configured");
417 let features = wgt::Features::VULKAN_GOOGLE_DISPLAY_TIMING;
418 if swapchain.device.features.contains(features) {
419 swapchain.next_present_time = Some(present_timing);
420 } else {
421 // Ideally we'd use something like `device.required_features` here, but that's in `wgpu-core`, which we are a dependency of
422 panic!(
423 concat!(
424 "Tried to set display timing properties ",
425 "without the corresponding feature ({:?}) enabled."
426 ),
427 features
428 );
429 }
430 }
431}
432
433#[derive(Debug)]
434pub struct SurfaceTexture {
435 index: u32,
436 texture: Texture,
437 surface_semaphores: Arc<Mutex<SwapchainImageSemaphores>>,
438}
439
440impl crate::DynSurfaceTexture for SurfaceTexture {}
441
442impl Borrow<Texture> for SurfaceTexture {
443 fn borrow(&self) -> &Texture {
444 &self.texture
445 }
446}
447
448impl Borrow<dyn crate::DynTexture> for SurfaceTexture {
449 fn borrow(&self) -> &dyn crate::DynTexture {
450 &self.texture
451 }
452}
453
454pub struct Adapter {
455 raw: vk::PhysicalDevice,
456 instance: Arc<InstanceShared>,
457 //queue_families: Vec<vk::QueueFamilyProperties>,
458 known_memory_flags: vk::MemoryPropertyFlags,
459 phd_capabilities: adapter::PhysicalDeviceProperties,
460 //phd_features: adapter::PhysicalDeviceFeatures,
461 downlevel_flags: wgt::DownlevelFlags,
462 private_caps: PrivateCapabilities,
463 workarounds: Workarounds,
464}
465
466// TODO there's no reason why this can't be unified--the function pointers should all be the same--it's not clear how to do this with `ash`.
467enum ExtensionFn<T> {
468 /// The loaded function pointer struct for an extension.
469 Extension(T),
470 /// The extension was promoted to a core version of Vulkan and the functions on `ash`'s `DeviceV1_x` traits should be used.
471 Promoted,
472}
473
474struct DeviceExtensionFunctions {
475 debug_utils: Option<ext::debug_utils::Device>,
476 draw_indirect_count: Option<khr::draw_indirect_count::Device>,
477 timeline_semaphore: Option<ExtensionFn<khr::timeline_semaphore::Device>>,
478 ray_tracing: Option<RayTracingDeviceExtensionFunctions>,
479}
480
481struct RayTracingDeviceExtensionFunctions {
482 acceleration_structure: khr::acceleration_structure::Device,
483 buffer_device_address: khr::buffer_device_address::Device,
484}
485
486/// Set of internal capabilities, which don't show up in the exposed
487/// device geometry, but affect the code paths taken internally.
488#[derive(Clone, Debug)]
489struct PrivateCapabilities {
490 /// Y-flipping is implemented with either `VK_AMD_negative_viewport_height` or `VK_KHR_maintenance1`/1.1+. The AMD extension for negative viewport height does not require a Y shift.
491 ///
492 /// This flag is `true` if the device has `VK_KHR_maintenance1`/1.1+ and `false` otherwise (i.e. in the case of `VK_AMD_negative_viewport_height`).
493 flip_y_requires_shift: bool,
494 imageless_framebuffers: bool,
495 image_view_usage: bool,
496 timeline_semaphores: bool,
497 texture_d24: bool,
498 texture_d24_s8: bool,
499 texture_s8: bool,
500 /// Ability to present contents to any screen. Only needed to work around broken platform configurations.
501 can_present: bool,
502 non_coherent_map_mask: wgt::BufferAddress,
503
504 /// True if this adapter advertises the [`robustBufferAccess`][vrba] feature.
505 ///
506 /// Note that Vulkan's `robustBufferAccess` is not sufficient to implement
507 /// `wgpu_hal`'s guarantee that shaders will not access buffer contents via
508 /// a given bindgroup binding outside that binding's [accessible
509 /// region][ar]. Enabling `robustBufferAccess` does ensure that
510 /// out-of-bounds reads and writes are not undefined behavior (that's good),
511 /// but still permits out-of-bounds reads to return data from anywhere
512 /// within the buffer, not just the accessible region.
513 ///
514 /// [ar]: ../struct.BufferBinding.html#accessible-region
515 /// [vrba]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#features-robustBufferAccess
516 robust_buffer_access: bool,
517
518 robust_image_access: bool,
519
520 /// True if this adapter supports the [`VK_EXT_robustness2`] extension's
521 /// [`robustBufferAccess2`] feature.
522 ///
523 /// This is sufficient to implement `wgpu_hal`'s [required bounds-checking][ar] of
524 /// shader accesses to buffer contents. If this feature is not available,
525 /// this backend must have Naga inject bounds checks in the generated
526 /// SPIR-V.
527 ///
528 /// [`VK_EXT_robustness2`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_EXT_robustness2.html
529 /// [`robustBufferAccess2`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPhysicalDeviceRobustness2FeaturesEXT.html#features-robustBufferAccess2
530 /// [ar]: ../struct.BufferBinding.html#accessible-region
531 robust_buffer_access2: bool,
532
533 robust_image_access2: bool,
534 zero_initialize_workgroup_memory: bool,
535 image_format_list: bool,
536 maximum_samplers: u32,
537}
538
539bitflags::bitflags!(
540 /// Workaround flags.
541 #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
542 pub struct Workarounds: u32 {
543 /// Only generate SPIR-V for one entry point at a time.
544 const SEPARATE_ENTRY_POINTS = 0x1;
545 /// Qualcomm OOMs when there are zero color attachments but a non-null pointer
546 /// to a subpass resolve attachment array. This nulls out that pointer in that case.
547 const EMPTY_RESOLVE_ATTACHMENT_LISTS = 0x2;
548 /// If the following code returns false, then nvidia will end up filling the wrong range.
549 ///
550 /// ```skip
551 /// fn nvidia_succeeds() -> bool {
552 /// # let (copy_length, start_offset) = (0, 0);
553 /// if copy_length >= 4096 {
554 /// if start_offset % 16 != 0 {
555 /// if copy_length == 4096 {
556 /// return true;
557 /// }
558 /// if copy_length % 16 == 0 {
559 /// return false;
560 /// }
561 /// }
562 /// }
563 /// true
564 /// }
565 /// ```
566 ///
567 /// As such, we need to make sure all calls to vkCmdFillBuffer are aligned to 16 bytes
568 /// if they cover a range of 4096 bytes or more.
569 const FORCE_FILL_BUFFER_WITH_SIZE_GREATER_4096_ALIGNED_OFFSET_16 = 0x4;
570 }
571);
572
573#[derive(Clone, Debug, Eq, Hash, PartialEq)]
574struct AttachmentKey {
575 format: vk::Format,
576 layout: vk::ImageLayout,
577 ops: crate::AttachmentOps,
578}
579
580impl AttachmentKey {
581 /// Returns an attachment key for a compatible attachment.
582 fn compatible(format: vk::Format, layout: vk::ImageLayout) -> Self {
583 Self {
584 format,
585 layout,
586 ops: crate::AttachmentOps::all(),
587 }
588 }
589}
590
591#[derive(Clone, Eq, Hash, PartialEq)]
592struct ColorAttachmentKey {
593 base: AttachmentKey,
594 resolve: Option<AttachmentKey>,
595}
596
597#[derive(Clone, Eq, Hash, PartialEq)]
598struct DepthStencilAttachmentKey {
599 base: AttachmentKey,
600 stencil_ops: crate::AttachmentOps,
601}
602
603#[derive(Clone, Eq, Default, Hash, PartialEq)]
604struct RenderPassKey {
605 colors: ArrayVec<Option<ColorAttachmentKey>, { crate::MAX_COLOR_ATTACHMENTS }>,
606 depth_stencil: Option<DepthStencilAttachmentKey>,
607 sample_count: u32,
608 multiview: Option<NonZeroU32>,
609}
610
611#[derive(Clone, Debug, Eq, Hash, PartialEq)]
612struct FramebufferAttachment {
613 /// Can be NULL if the framebuffer is image-less
614 raw: vk::ImageView,
615 raw_image_flags: vk::ImageCreateFlags,
616 view_usage: crate::TextureUses,
617 view_format: wgt::TextureFormat,
618 raw_view_formats: Vec<vk::Format>,
619}
620
621#[derive(Clone, Eq, Hash, PartialEq)]
622struct FramebufferKey {
623 attachments: ArrayVec<FramebufferAttachment, { MAX_TOTAL_ATTACHMENTS }>,
624 extent: wgt::Extent3d,
625 sample_count: u32,
626}
627
628struct DeviceShared {
629 raw: ash::Device,
630 family_index: u32,
631 queue_index: u32,
632 raw_queue: vk::Queue,
633 drop_guard: Option<crate::DropGuard>,
634 instance: Arc<InstanceShared>,
635 physical_device: vk::PhysicalDevice,
636 enabled_extensions: Vec<&'static CStr>,
637 extension_fns: DeviceExtensionFunctions,
638 vendor_id: u32,
639 pipeline_cache_validation_key: [u8; 16],
640 timestamp_period: f32,
641 private_caps: PrivateCapabilities,
642 workarounds: Workarounds,
643 features: wgt::Features,
644 render_passes: Mutex<rustc_hash::FxHashMap<RenderPassKey, vk::RenderPass>>,
645 framebuffers: Mutex<rustc_hash::FxHashMap<FramebufferKey, vk::Framebuffer>>,
646 sampler_cache: Mutex<sampler::SamplerCache>,
647 memory_allocations_counter: InternalCounter,
648}
649
650impl Drop for DeviceShared {
651 fn drop(&mut self) {
652 for &raw in self.render_passes.lock().values() {
653 unsafe { self.raw.destroy_render_pass(raw, None) };
654 }
655 for &raw in self.framebuffers.lock().values() {
656 unsafe { self.raw.destroy_framebuffer(raw, None) };
657 }
658 if self.drop_guard.is_none() {
659 unsafe { self.raw.destroy_device(None) };
660 }
661 }
662}
663
664pub struct Device {
665 shared: Arc<DeviceShared>,
666 mem_allocator: Mutex<gpu_alloc::GpuAllocator<vk::DeviceMemory>>,
667 desc_allocator:
668 Mutex<gpu_descriptor::DescriptorAllocator<vk::DescriptorPool, vk::DescriptorSet>>,
669 valid_ash_memory_types: u32,
670 naga_options: naga::back::spv::Options<'static>,
671 #[cfg(feature = "renderdoc")]
672 render_doc: crate::auxil::renderdoc::RenderDoc,
673 counters: Arc<wgt::HalCounters>,
674}
675
676impl Drop for Device {
677 fn drop(&mut self) {
678 unsafe { self.mem_allocator.lock().cleanup(&*self.shared) };
679 unsafe { self.desc_allocator.lock().cleanup(&*self.shared) };
680 }
681}
682
683/// Semaphores for forcing queue submissions to run in order.
684///
685/// The [`wgpu_hal::Queue`] trait promises that if two calls to [`submit`] are
686/// ordered, then the first submission will finish on the GPU before the second
687/// submission begins. To get this behavior on Vulkan we need to pass semaphores
688/// to [`vkQueueSubmit`] for the commands to wait on before beginning execution,
689/// and to signal when their execution is done.
690///
691/// Normally this can be done with a single semaphore, waited on and then
692/// signalled for each submission. At any given time there's exactly one
693/// submission that would signal the semaphore, and exactly one waiting on it,
694/// as Vulkan requires.
695///
696/// However, as of Oct 2021, bug [#5508] in the Mesa ANV drivers caused them to
697/// hang if we use a single semaphore. The workaround is to alternate between
698/// two semaphores. The bug has been fixed in Mesa, but we should probably keep
699/// the workaround until, say, Oct 2026.
700///
701/// [`wgpu_hal::Queue`]: crate::Queue
702/// [`submit`]: crate::Queue::submit
703/// [`vkQueueSubmit`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueueSubmit
704/// [#5508]: https://gitlab.freedesktop.org/mesa/mesa/-/issues/5508
705#[derive(Clone)]
706struct RelaySemaphores {
707 /// The semaphore the next submission should wait on before beginning
708 /// execution on the GPU. This is `None` for the first submission, which
709 /// should not wait on anything at all.
710 wait: Option<vk::Semaphore>,
711
712 /// The semaphore the next submission should signal when it has finished
713 /// execution on the GPU.
714 signal: vk::Semaphore,
715}
716
717impl RelaySemaphores {
718 fn new(device: &DeviceShared) -> Result<Self, crate::DeviceError> {
719 Ok(Self {
720 wait: None,
721 signal: device.new_binary_semaphore()?,
722 })
723 }
724
725 /// Advances the semaphores, returning the semaphores that should be used for a submission.
726 fn advance(&mut self, device: &DeviceShared) -> Result<Self, crate::DeviceError> {
727 let old = self.clone();
728
729 // Build the state for the next submission.
730 match self.wait {
731 None => {
732 // The `old` values describe the first submission to this queue.
733 // The second submission should wait on `old.signal`, and then
734 // signal a new semaphore which we'll create now.
735 self.wait = Some(old.signal);
736 self.signal = device.new_binary_semaphore()?;
737 }
738 Some(ref mut wait) => {
739 // What this submission signals, the next should wait.
740 mem::swap(wait, &mut self.signal);
741 }
742 };
743
744 Ok(old)
745 }
746
747 /// Destroys the semaphores.
748 unsafe fn destroy(&self, device: &ash::Device) {
749 unsafe {
750 if let Some(wait) = self.wait {
751 device.destroy_semaphore(wait, None);
752 }
753 device.destroy_semaphore(self.signal, None);
754 }
755 }
756}
757
758pub struct Queue {
759 raw: vk::Queue,
760 swapchain_fn: khr::swapchain::Device,
761 device: Arc<DeviceShared>,
762 family_index: u32,
763 relay_semaphores: Mutex<RelaySemaphores>,
764}
765
766impl Drop for Queue {
767 fn drop(&mut self) {
768 unsafe { self.relay_semaphores.lock().destroy(&self.device.raw) };
769 }
770}
771
772#[derive(Debug)]
773pub struct Buffer {
774 raw: vk::Buffer,
775 block: Option<Mutex<gpu_alloc::MemoryBlock<vk::DeviceMemory>>>,
776}
777
778impl crate::DynBuffer for Buffer {}
779
780#[derive(Debug)]
781pub struct AccelerationStructure {
782 raw: vk::AccelerationStructureKHR,
783 buffer: vk::Buffer,
784 block: Mutex<gpu_alloc::MemoryBlock<vk::DeviceMemory>>,
785}
786
787impl crate::DynAccelerationStructure for AccelerationStructure {}
788
789#[derive(Debug)]
790pub struct Texture {
791 raw: vk::Image,
792 drop_guard: Option<crate::DropGuard>,
793 external_memory: Option<vk::DeviceMemory>,
794 block: Option<gpu_alloc::MemoryBlock<vk::DeviceMemory>>,
795 usage: crate::TextureUses,
796 format: wgt::TextureFormat,
797 raw_flags: vk::ImageCreateFlags,
798 copy_size: crate::CopyExtent,
799 view_formats: Vec<wgt::TextureFormat>,
800}
801
802impl crate::DynTexture for Texture {}
803
804impl Texture {
805 /// # Safety
806 ///
807 /// - The image handle must not be manually destroyed
808 pub unsafe fn raw_handle(&self) -> vk::Image {
809 self.raw
810 }
811}
812
813#[derive(Debug)]
814pub struct TextureView {
815 raw: vk::ImageView,
816 layers: NonZeroU32,
817 attachment: FramebufferAttachment,
818}
819
820impl crate::DynTextureView for TextureView {}
821
822impl TextureView {
823 /// # Safety
824 ///
825 /// - The image view handle must not be manually destroyed
826 pub unsafe fn raw_handle(&self) -> vk::ImageView {
827 self.raw
828 }
829}
830
831#[derive(Debug)]
832pub struct Sampler {
833 raw: vk::Sampler,
834 create_info: vk::SamplerCreateInfo<'static>,
835}
836
837impl crate::DynSampler for Sampler {}
838
839#[derive(Debug)]
840pub struct BindGroupLayout {
841 raw: vk::DescriptorSetLayout,
842 desc_count: gpu_descriptor::DescriptorTotalCount,
843 types: Box<[(vk::DescriptorType, u32)]>,
844 /// Map of binding index to size,
845 binding_arrays: Vec<(u32, NonZeroU32)>,
846}
847
848impl crate::DynBindGroupLayout for BindGroupLayout {}
849
850#[derive(Debug)]
851pub struct PipelineLayout {
852 raw: vk::PipelineLayout,
853 binding_arrays: naga::back::spv::BindingMap,
854}
855
856impl crate::DynPipelineLayout for PipelineLayout {}
857
858#[derive(Debug)]
859pub struct BindGroup {
860 set: gpu_descriptor::DescriptorSet<vk::DescriptorSet>,
861}
862
863impl crate::DynBindGroup for BindGroup {}
864
865/// Miscellaneous allocation recycling pool for `CommandAllocator`.
866#[derive(Default)]
867struct Temp {
868 marker: Vec<u8>,
869 buffer_barriers: Vec<vk::BufferMemoryBarrier<'static>>,
870 image_barriers: Vec<vk::ImageMemoryBarrier<'static>>,
871}
872
873impl Temp {
874 fn clear(&mut self) {
875 self.marker.clear();
876 self.buffer_barriers.clear();
877 self.image_barriers.clear();
878 }
879
880 fn make_c_str(&mut self, name: &str) -> &CStr {
881 self.marker.clear();
882 self.marker.extend_from_slice(name.as_bytes());
883 self.marker.push(0);
884 unsafe { CStr::from_bytes_with_nul_unchecked(&self.marker) }
885 }
886}
887
888pub struct CommandEncoder {
889 raw: vk::CommandPool,
890 device: Arc<DeviceShared>,
891
892 /// The current command buffer, if `self` is in the ["recording"]
893 /// state.
894 ///
895 /// ["recording"]: crate::CommandEncoder
896 ///
897 /// If non-`null`, the buffer is in the Vulkan "recording" state.
898 active: vk::CommandBuffer,
899
900 /// What kind of pass we are currently within: compute or render.
901 bind_point: vk::PipelineBindPoint,
902
903 /// Allocation recycling pool for this encoder.
904 temp: Temp,
905
906 /// A pool of available command buffers.
907 ///
908 /// These are all in the Vulkan "initial" state.
909 free: Vec<vk::CommandBuffer>,
910
911 /// A pool of discarded command buffers.
912 ///
913 /// These could be in any Vulkan state except "pending".
914 discarded: Vec<vk::CommandBuffer>,
915
916 /// If this is true, the active renderpass enabled a debug span,
917 /// and needs to be disabled on renderpass close.
918 rpass_debug_marker_active: bool,
919
920 /// If set, the end of the next render/compute pass will write a timestamp at
921 /// the given pool & location.
922 end_of_pass_timer_query: Option<(vk::QueryPool, u32)>,
923
924 counters: Arc<wgt::HalCounters>,
925}
926
927impl Drop for CommandEncoder {
928 fn drop(&mut self) {
929 // SAFETY:
930 //
931 // VUID-vkDestroyCommandPool-commandPool-00041: wgpu_hal requires that a
932 // `CommandBuffer` must live until its execution is complete, and that a
933 // `CommandBuffer` must not outlive the `CommandEncoder` that built it.
934 // Thus, we know that none of our `CommandBuffers` are in the "pending"
935 // state.
936 //
937 // The other VUIDs are pretty obvious.
938 unsafe {
939 // `vkDestroyCommandPool` also frees any command buffers allocated
940 // from that pool, so there's no need to explicitly call
941 // `vkFreeCommandBuffers` on `cmd_encoder`'s `free` and `discarded`
942 // fields.
943 self.device.raw.destroy_command_pool(self.raw, None);
944 }
945 self.counters.command_encoders.sub(1);
946 }
947}
948
949impl CommandEncoder {
950 /// # Safety
951 ///
952 /// - The command buffer handle must not be manually destroyed
953 pub unsafe fn raw_handle(&self) -> vk::CommandBuffer {
954 self.active
955 }
956}
957
958impl fmt::Debug for CommandEncoder {
959 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
960 f.debug_struct("CommandEncoder")
961 .field("raw", &self.raw)
962 .finish()
963 }
964}
965
966#[derive(Debug)]
967pub struct CommandBuffer {
968 raw: vk::CommandBuffer,
969}
970
971impl crate::DynCommandBuffer for CommandBuffer {}
972
973#[derive(Debug)]
974#[allow(clippy::large_enum_variant)]
975pub enum ShaderModule {
976 Raw(vk::ShaderModule),
977 Intermediate {
978 naga_shader: crate::NagaShader,
979 runtime_checks: wgt::ShaderRuntimeChecks,
980 },
981}
982
983impl crate::DynShaderModule for ShaderModule {}
984
985#[derive(Debug)]
986pub struct RenderPipeline {
987 raw: vk::Pipeline,
988}
989
990impl crate::DynRenderPipeline for RenderPipeline {}
991
992#[derive(Debug)]
993pub struct ComputePipeline {
994 raw: vk::Pipeline,
995}
996
997impl crate::DynComputePipeline for ComputePipeline {}
998
999#[derive(Debug)]
1000pub struct PipelineCache {
1001 raw: vk::PipelineCache,
1002}
1003
1004impl crate::DynPipelineCache for PipelineCache {}
1005
1006#[derive(Debug)]
1007pub struct QuerySet {
1008 raw: vk::QueryPool,
1009}
1010
1011impl crate::DynQuerySet for QuerySet {}
1012
1013/// The [`Api::Fence`] type for [`vulkan::Api`].
1014///
1015/// This is an `enum` because there are two possible implementations of
1016/// `wgpu-hal` fences on Vulkan: Vulkan fences, which work on any version of
1017/// Vulkan, and Vulkan timeline semaphores, which are easier and cheaper but
1018/// require non-1.0 features.
1019///
1020/// [`Device::create_fence`] returns a [`TimelineSemaphore`] if
1021/// [`VK_KHR_timeline_semaphore`] is available and enabled, and a [`FencePool`]
1022/// otherwise.
1023///
1024/// [`Api::Fence`]: crate::Api::Fence
1025/// [`vulkan::Api`]: Api
1026/// [`Device::create_fence`]: crate::Device::create_fence
1027/// [`TimelineSemaphore`]: Fence::TimelineSemaphore
1028/// [`VK_KHR_timeline_semaphore`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VK_KHR_timeline_semaphore
1029/// [`FencePool`]: Fence::FencePool
1030#[derive(Debug)]
1031pub enum Fence {
1032 /// A Vulkan [timeline semaphore].
1033 ///
1034 /// These are simpler to use than Vulkan fences, since timeline semaphores
1035 /// work exactly the way [`wpgu_hal::Api::Fence`] is specified to work.
1036 ///
1037 /// [timeline semaphore]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-semaphores
1038 /// [`wpgu_hal::Api::Fence`]: crate::Api::Fence
1039 TimelineSemaphore(vk::Semaphore),
1040
1041 /// A collection of Vulkan [fence]s, each associated with a [`FenceValue`].
1042 ///
1043 /// The effective [`FenceValue`] of this variant is the greater of
1044 /// `last_completed` and the maximum value associated with a signalled fence
1045 /// in `active`.
1046 ///
1047 /// Fences are available in all versions of Vulkan, but since they only have
1048 /// two states, "signaled" and "unsignaled", we need to use a separate fence
1049 /// for each queue submission we might want to wait for, and remember which
1050 /// [`FenceValue`] each one represents.
1051 ///
1052 /// [fence]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-fences
1053 /// [`FenceValue`]: crate::FenceValue
1054 FencePool {
1055 last_completed: crate::FenceValue,
1056 /// The pending fence values have to be ascending.
1057 active: Vec<(crate::FenceValue, vk::Fence)>,
1058 free: Vec<vk::Fence>,
1059 },
1060}
1061
1062impl crate::DynFence for Fence {}
1063
1064impl Fence {
1065 /// Return the highest [`FenceValue`] among the signalled fences in `active`.
1066 ///
1067 /// As an optimization, assume that we already know that the fence has
1068 /// reached `last_completed`, and don't bother checking fences whose values
1069 /// are less than that: those fences remain in the `active` array only
1070 /// because we haven't called `maintain` yet to clean them up.
1071 ///
1072 /// [`FenceValue`]: crate::FenceValue
1073 fn check_active(
1074 device: &ash::Device,
1075 mut last_completed: crate::FenceValue,
1076 active: &[(crate::FenceValue, vk::Fence)],
1077 ) -> Result<crate::FenceValue, crate::DeviceError> {
1078 for &(value, raw) in active.iter() {
1079 unsafe {
1080 if value > last_completed
1081 && device
1082 .get_fence_status(raw)
1083 .map_err(map_host_device_oom_and_lost_err)?
1084 {
1085 last_completed = value;
1086 }
1087 }
1088 }
1089 Ok(last_completed)
1090 }
1091
1092 /// Return the highest signalled [`FenceValue`] for `self`.
1093 ///
1094 /// [`FenceValue`]: crate::FenceValue
1095 fn get_latest(
1096 &self,
1097 device: &ash::Device,
1098 extension: Option<&ExtensionFn<khr::timeline_semaphore::Device>>,
1099 ) -> Result<crate::FenceValue, crate::DeviceError> {
1100 match *self {
1101 Self::TimelineSemaphore(raw) => unsafe {
1102 Ok(match *extension.unwrap() {
1103 ExtensionFn::Extension(ref ext) => ext
1104 .get_semaphore_counter_value(raw)
1105 .map_err(map_host_device_oom_and_lost_err)?,
1106 ExtensionFn::Promoted => device
1107 .get_semaphore_counter_value(raw)
1108 .map_err(map_host_device_oom_and_lost_err)?,
1109 })
1110 },
1111 Self::FencePool {
1112 last_completed,
1113 ref active,
1114 free: _,
1115 } => Self::check_active(device, last_completed, active),
1116 }
1117 }
1118
1119 /// Trim the internal state of this [`Fence`].
1120 ///
1121 /// This function has no externally visible effect, but you should call it
1122 /// periodically to keep this fence's resource consumption under control.
1123 ///
1124 /// For fences using the [`FencePool`] implementation, this function
1125 /// recycles fences that have been signaled. If you don't call this,
1126 /// [`Queue::submit`] will just keep allocating a new Vulkan fence every
1127 /// time it's called.
1128 ///
1129 /// [`FencePool`]: Fence::FencePool
1130 /// [`Queue::submit`]: crate::Queue::submit
1131 fn maintain(&mut self, device: &ash::Device) -> Result<(), crate::DeviceError> {
1132 match *self {
1133 Self::TimelineSemaphore(_) => {}
1134 Self::FencePool {
1135 ref mut last_completed,
1136 ref mut active,
1137 ref mut free,
1138 } => {
1139 let latest = Self::check_active(device, *last_completed, active)?;
1140 let base_free = free.len();
1141 for &(value, raw) in active.iter() {
1142 if value <= latest {
1143 free.push(raw);
1144 }
1145 }
1146 if free.len() != base_free {
1147 active.retain(|&(value, _)| value > latest);
1148 unsafe { device.reset_fences(&free[base_free..]) }
1149 .map_err(map_device_oom_err)?
1150 }
1151 *last_completed = latest;
1152 }
1153 }
1154 Ok(())
1155 }
1156}
1157
1158impl crate::Queue for Queue {
1159 type A = Api;
1160
1161 unsafe fn submit(
1162 &self,
1163 command_buffers: &[&CommandBuffer],
1164 surface_textures: &[&SurfaceTexture],
1165 (signal_fence, signal_value): (&mut Fence, crate::FenceValue),
1166 ) -> Result<(), crate::DeviceError> {
1167 let mut fence_raw = vk::Fence::null();
1168
1169 let mut wait_stage_masks = Vec::new();
1170 let mut wait_semaphores = Vec::new();
1171 let mut signal_semaphores = Vec::new();
1172 let mut signal_values = Vec::new();
1173
1174 // Double check that the same swapchain image isn't being given to us multiple times,
1175 // as that will deadlock when we try to lock them all.
1176 debug_assert!(
1177 {
1178 let mut check = HashSet::with_capacity(surface_textures.len());
1179 // We compare the Arcs by pointer, as Eq isn't well defined for SurfaceSemaphores.
1180 for st in surface_textures {
1181 check.insert(Arc::as_ptr(&st.surface_semaphores));
1182 }
1183 check.len() == surface_textures.len()
1184 },
1185 "More than one surface texture is being used from the same swapchain. This will cause a deadlock in release."
1186 );
1187
1188 let locked_swapchain_semaphores = surface_textures
1189 .iter()
1190 .map(|st| {
1191 st.surface_semaphores
1192 .try_lock()
1193 .expect("Failed to lock surface semaphore.")
1194 })
1195 .collect::<Vec<_>>();
1196
1197 for mut swapchain_semaphore in locked_swapchain_semaphores {
1198 swapchain_semaphore.set_used_fence_value(signal_value);
1199
1200 // If we're the first submission to operate on this image, wait on
1201 // its acquire semaphore, to make sure the presentation engine is
1202 // done with it.
1203 if let Some(sem) = swapchain_semaphore.get_acquire_wait_semaphore() {
1204 wait_stage_masks.push(vk::PipelineStageFlags::TOP_OF_PIPE);
1205 wait_semaphores.push(sem);
1206 }
1207
1208 // Get a semaphore to signal when we're done writing to this surface
1209 // image. Presentation of this image will wait for this.
1210 let signal_semaphore = swapchain_semaphore.get_submit_signal_semaphore(&self.device)?;
1211 signal_semaphores.push(signal_semaphore);
1212 signal_values.push(!0);
1213 }
1214
1215 // In order for submissions to be strictly ordered, we encode a dependency between each submission
1216 // using a pair of semaphores. This adds a wait if it is needed, and signals the next semaphore.
1217 let semaphore_state = self.relay_semaphores.lock().advance(&self.device)?;
1218
1219 if let Some(sem) = semaphore_state.wait {
1220 wait_stage_masks.push(vk::PipelineStageFlags::TOP_OF_PIPE);
1221 wait_semaphores.push(sem);
1222 }
1223
1224 signal_semaphores.push(semaphore_state.signal);
1225 signal_values.push(!0);
1226
1227 // We need to signal our wgpu::Fence if we have one, this adds it to the signal list.
1228 signal_fence.maintain(&self.device.raw)?;
1229 match *signal_fence {
1230 Fence::TimelineSemaphore(raw) => {
1231 signal_semaphores.push(raw);
1232 signal_values.push(signal_value);
1233 }
1234 Fence::FencePool {
1235 ref mut active,
1236 ref mut free,
1237 ..
1238 } => {
1239 fence_raw = match free.pop() {
1240 Some(raw) => raw,
1241 None => unsafe {
1242 self.device
1243 .raw
1244 .create_fence(&vk::FenceCreateInfo::default(), None)
1245 .map_err(map_host_device_oom_err)?
1246 },
1247 };
1248 active.push((signal_value, fence_raw));
1249 }
1250 }
1251
1252 let vk_cmd_buffers = command_buffers
1253 .iter()
1254 .map(|cmd| cmd.raw)
1255 .collect::<Vec<_>>();
1256
1257 let mut vk_info = vk::SubmitInfo::default().command_buffers(&vk_cmd_buffers);
1258
1259 vk_info = vk_info
1260 .wait_semaphores(&wait_semaphores)
1261 .wait_dst_stage_mask(&wait_stage_masks)
1262 .signal_semaphores(&signal_semaphores);
1263
1264 let mut vk_timeline_info;
1265
1266 if self.device.private_caps.timeline_semaphores {
1267 vk_timeline_info =
1268 vk::TimelineSemaphoreSubmitInfo::default().signal_semaphore_values(&signal_values);
1269 vk_info = vk_info.push_next(&mut vk_timeline_info);
1270 }
1271
1272 profiling::scope!("vkQueueSubmit");
1273 unsafe {
1274 self.device
1275 .raw
1276 .queue_submit(self.raw, &[vk_info], fence_raw)
1277 .map_err(map_host_device_oom_and_lost_err)?
1278 };
1279 Ok(())
1280 }
1281
1282 unsafe fn present(
1283 &self,
1284 surface: &Surface,
1285 texture: SurfaceTexture,
1286 ) -> Result<(), crate::SurfaceError> {
1287 let mut swapchain = surface.swapchain.write();
1288 let ssc = swapchain.as_mut().unwrap();
1289 let mut swapchain_semaphores = texture.surface_semaphores.lock();
1290
1291 let swapchains = [ssc.raw];
1292 let image_indices = [texture.index];
1293 let vk_info = vk::PresentInfoKHR::default()
1294 .swapchains(&swapchains)
1295 .image_indices(&image_indices)
1296 .wait_semaphores(swapchain_semaphores.get_present_wait_semaphores());
1297
1298 let mut display_timing;
1299 let present_times;
1300 let vk_info = if let Some(present_time) = ssc.next_present_time.take() {
1301 debug_assert!(
1302 ssc.device
1303 .features
1304 .contains(wgt::Features::VULKAN_GOOGLE_DISPLAY_TIMING),
1305 "`next_present_time` should only be set if `VULKAN_GOOGLE_DISPLAY_TIMING` is enabled"
1306 );
1307 present_times = [present_time];
1308 display_timing = vk::PresentTimesInfoGOOGLE::default().times(&present_times);
1309 // SAFETY: We know that VK_GOOGLE_display_timing is present because of the safety contract on `next_present_time`.
1310 vk_info.push_next(&mut display_timing)
1311 } else {
1312 vk_info
1313 };
1314
1315 let suboptimal = {
1316 profiling::scope!("vkQueuePresentKHR");
1317 unsafe { self.swapchain_fn.queue_present(self.raw, &vk_info) }.map_err(|error| {
1318 match error {
1319 vk::Result::ERROR_OUT_OF_DATE_KHR => crate::SurfaceError::Outdated,
1320 vk::Result::ERROR_SURFACE_LOST_KHR => crate::SurfaceError::Lost,
1321 // We don't use VK_EXT_full_screen_exclusive
1322 // VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT
1323 _ => map_host_device_oom_and_lost_err(error).into(),
1324 }
1325 })?
1326 };
1327 if suboptimal {
1328 // We treat `VK_SUBOPTIMAL_KHR` as `VK_SUCCESS` on Android.
1329 // On Android 10+, libvulkan's `vkQueuePresentKHR` implementation returns `VK_SUBOPTIMAL_KHR` if not doing pre-rotation
1330 // (i.e `VkSwapchainCreateInfoKHR::preTransform` not being equal to the current device orientation).
1331 // This is always the case when the device orientation is anything other than the identity one, as we unconditionally use `VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR`.
1332 #[cfg(not(target_os = "android"))]
1333 log::warn!("Suboptimal present of frame {}", texture.index);
1334 }
1335 Ok(())
1336 }
1337
1338 unsafe fn get_timestamp_period(&self) -> f32 {
1339 self.device.timestamp_period
1340 }
1341}
1342
1343/// Maps
1344///
1345/// - VK_ERROR_OUT_OF_HOST_MEMORY
1346/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1347fn map_host_device_oom_err(err: vk::Result) -> crate::DeviceError {
1348 match err {
1349 vk::Result::ERROR_OUT_OF_HOST_MEMORY | vk::Result::ERROR_OUT_OF_DEVICE_MEMORY => {
1350 get_oom_err(err)
1351 }
1352 e => get_unexpected_err(e),
1353 }
1354}
1355
1356/// Maps
1357///
1358/// - VK_ERROR_OUT_OF_HOST_MEMORY
1359/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1360/// - VK_ERROR_DEVICE_LOST
1361fn map_host_device_oom_and_lost_err(err: vk::Result) -> crate::DeviceError {
1362 match err {
1363 vk::Result::ERROR_DEVICE_LOST => get_lost_err(),
1364 other => map_host_device_oom_err(other),
1365 }
1366}
1367
1368/// Maps
1369///
1370/// - VK_ERROR_OUT_OF_HOST_MEMORY
1371/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1372/// - VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1373fn map_host_device_oom_and_ioca_err(err: vk::Result) -> crate::DeviceError {
1374 // We don't use VK_KHR_buffer_device_address
1375 // VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1376 map_host_device_oom_err(err)
1377}
1378
1379/// Maps
1380///
1381/// - VK_ERROR_OUT_OF_HOST_MEMORY
1382fn map_host_oom_err(err: vk::Result) -> crate::DeviceError {
1383 match err {
1384 vk::Result::ERROR_OUT_OF_HOST_MEMORY => get_oom_err(err),
1385 e => get_unexpected_err(e),
1386 }
1387}
1388
1389/// Maps
1390///
1391/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1392fn map_device_oom_err(err: vk::Result) -> crate::DeviceError {
1393 match err {
1394 vk::Result::ERROR_OUT_OF_DEVICE_MEMORY => get_oom_err(err),
1395 e => get_unexpected_err(e),
1396 }
1397}
1398
1399/// Maps
1400///
1401/// - VK_ERROR_OUT_OF_HOST_MEMORY
1402/// - VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1403fn map_host_oom_and_ioca_err(err: vk::Result) -> crate::DeviceError {
1404 // We don't use VK_KHR_buffer_device_address
1405 // VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1406 map_host_oom_err(err)
1407}
1408
1409/// Maps
1410///
1411/// - VK_ERROR_OUT_OF_HOST_MEMORY
1412/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1413/// - VK_PIPELINE_COMPILE_REQUIRED_EXT
1414/// - VK_ERROR_INVALID_SHADER_NV
1415fn map_pipeline_err(err: vk::Result) -> crate::DeviceError {
1416 // We don't use VK_EXT_pipeline_creation_cache_control
1417 // VK_PIPELINE_COMPILE_REQUIRED_EXT
1418 // We don't use VK_NV_glsl_shader
1419 // VK_ERROR_INVALID_SHADER_NV
1420 map_host_device_oom_err(err)
1421}
1422
1423/// Returns [`crate::DeviceError::Unexpected`] or panics if the `internal_error_panic`
1424/// feature flag is enabled.
1425fn get_unexpected_err(_err: vk::Result) -> crate::DeviceError {
1426 #[cfg(feature = "internal_error_panic")]
1427 panic!("Unexpected Vulkan error: {_err:?}");
1428
1429 #[allow(unreachable_code)]
1430 crate::DeviceError::Unexpected
1431}
1432
1433/// Returns [`crate::DeviceError::OutOfMemory`] or panics if the `oom_panic`
1434/// feature flag is enabled.
1435fn get_oom_err(_err: vk::Result) -> crate::DeviceError {
1436 #[cfg(feature = "oom_panic")]
1437 panic!("Out of memory ({_err:?})");
1438
1439 #[allow(unreachable_code)]
1440 crate::DeviceError::OutOfMemory
1441}
1442
1443/// Returns [`crate::DeviceError::Lost`] or panics if the `device_lost_panic`
1444/// feature flag is enabled.
1445fn get_lost_err() -> crate::DeviceError {
1446 #[cfg(feature = "device_lost_panic")]
1447 panic!("Device lost");
1448
1449 #[allow(unreachable_code)]
1450 crate::DeviceError::Lost
1451}
1452
1453#[derive(Clone)]
1454#[repr(C)]
1455struct RawTlasInstance {
1456 transform: [f32; 12],
1457 custom_index_and_mask: u32,
1458 shader_binding_table_record_offset_and_flags: u32,
1459 acceleration_structure_reference: u64,
1460}