wgpu_hal/vulkan/
mod.rs

1/*!
2# Vulkan API internals.
3
4## Stack memory
5
6Ash expects slices, which we don't generally have available.
7We cope with this requirement by the combination of the following ways:
8  - temporarily allocating `Vec` on heap, where overhead is permitted
9  - growing temporary local storage
10
11## Framebuffers and Render passes
12
13Render passes are cached on the device and kept forever.
14
15Framebuffers are also cached on the device, but they are removed when
16any of the image views (they have) gets removed.
17If Vulkan supports image-less framebuffers,
18then the actual views are excluded from the framebuffer key.
19
20## Fences
21
22If timeline semaphores are available, they are used 1:1 with wgpu-hal fences.
23Otherwise, we manage a pool of `VkFence` objects behind each `hal::Fence`.
24
25!*/
26
27mod adapter;
28mod command;
29mod conv;
30mod device;
31mod instance;
32mod sampler;
33
34use std::{
35    borrow::Borrow,
36    collections::HashSet,
37    ffi::{CStr, CString},
38    fmt, mem,
39    num::NonZeroU32,
40    sync::Arc,
41};
42
43use arrayvec::ArrayVec;
44use ash::{ext, khr, vk};
45use parking_lot::{Mutex, RwLock};
46use wgt::InternalCounter;
47
48const MILLIS_TO_NANOS: u64 = 1_000_000;
49const MAX_TOTAL_ATTACHMENTS: usize = crate::MAX_COLOR_ATTACHMENTS * 2 + 1;
50
51#[derive(Clone, Debug)]
52pub struct Api;
53
54impl crate::Api for Api {
55    type Instance = Instance;
56    type Surface = Surface;
57    type Adapter = Adapter;
58    type Device = Device;
59
60    type Queue = Queue;
61    type CommandEncoder = CommandEncoder;
62    type CommandBuffer = CommandBuffer;
63
64    type Buffer = Buffer;
65    type Texture = Texture;
66    type SurfaceTexture = SurfaceTexture;
67    type TextureView = TextureView;
68    type Sampler = Sampler;
69    type QuerySet = QuerySet;
70    type Fence = Fence;
71    type AccelerationStructure = AccelerationStructure;
72    type PipelineCache = PipelineCache;
73
74    type BindGroupLayout = BindGroupLayout;
75    type BindGroup = BindGroup;
76    type PipelineLayout = PipelineLayout;
77    type ShaderModule = ShaderModule;
78    type RenderPipeline = RenderPipeline;
79    type ComputePipeline = ComputePipeline;
80}
81
82crate::impl_dyn_resource!(
83    Adapter,
84    AccelerationStructure,
85    BindGroup,
86    BindGroupLayout,
87    Buffer,
88    CommandBuffer,
89    CommandEncoder,
90    ComputePipeline,
91    Device,
92    Fence,
93    Instance,
94    PipelineCache,
95    PipelineLayout,
96    QuerySet,
97    Queue,
98    RenderPipeline,
99    Sampler,
100    ShaderModule,
101    Surface,
102    SurfaceTexture,
103    Texture,
104    TextureView
105);
106
107struct DebugUtils {
108    extension: ext::debug_utils::Instance,
109    messenger: vk::DebugUtilsMessengerEXT,
110
111    /// Owning pointer to the debug messenger callback user data.
112    ///
113    /// `InstanceShared::drop` destroys the debug messenger before
114    /// dropping this, so the callback should never receive a dangling
115    /// user data pointer.
116    #[allow(dead_code)]
117    callback_data: Box<DebugUtilsMessengerUserData>,
118}
119
120pub struct DebugUtilsCreateInfo {
121    severity: vk::DebugUtilsMessageSeverityFlagsEXT,
122    message_type: vk::DebugUtilsMessageTypeFlagsEXT,
123    callback_data: Box<DebugUtilsMessengerUserData>,
124}
125
126#[derive(Debug)]
127/// The properties related to the validation layer needed for the
128/// DebugUtilsMessenger for their workarounds
129struct ValidationLayerProperties {
130    /// Validation layer description, from `vk::LayerProperties`.
131    layer_description: CString,
132
133    /// Validation layer specification version, from `vk::LayerProperties`.
134    layer_spec_version: u32,
135}
136
137/// User data needed by `instance::debug_utils_messenger_callback`.
138///
139/// When we create the [`vk::DebugUtilsMessengerEXT`], the `pUserData`
140/// pointer refers to one of these values.
141#[derive(Debug)]
142pub struct DebugUtilsMessengerUserData {
143    /// The properties related to the validation layer, if present
144    validation_layer_properties: Option<ValidationLayerProperties>,
145
146    /// If the OBS layer is present. OBS never increments the version of their layer,
147    /// so there's no reason to have the version.
148    has_obs_layer: bool,
149}
150
151pub struct InstanceShared {
152    raw: ash::Instance,
153    extensions: Vec<&'static CStr>,
154    drop_guard: Option<crate::DropGuard>,
155    flags: wgt::InstanceFlags,
156    debug_utils: Option<DebugUtils>,
157    get_physical_device_properties: Option<khr::get_physical_device_properties2::Instance>,
158    entry: ash::Entry,
159    has_nv_optimus: bool,
160    android_sdk_version: u32,
161    /// The instance API version.
162    ///
163    /// Which is the version of Vulkan supported for instance-level functionality.
164    ///
165    /// It is associated with a `VkInstance` and its children,
166    /// except for a `VkPhysicalDevice` and its children.
167    instance_api_version: u32,
168}
169
170pub struct Instance {
171    shared: Arc<InstanceShared>,
172}
173
174/// The semaphores needed to use one image in a swapchain.
175#[derive(Debug)]
176struct SwapchainImageSemaphores {
177    /// A semaphore that is signaled when this image is safe for us to modify.
178    ///
179    /// When [`vkAcquireNextImageKHR`] returns the index of the next swapchain
180    /// image that we should use, that image may actually still be in use by the
181    /// presentation engine, and is not yet safe to modify. However, that
182    /// function does accept a semaphore that it will signal when the image is
183    /// indeed safe to begin messing with.
184    ///
185    /// This semaphore is:
186    ///
187    /// - waited for by the first queue submission to operate on this image
188    ///   since it was acquired, and
189    ///
190    /// - signaled by [`vkAcquireNextImageKHR`] when the acquired image is ready
191    ///   for us to use.
192    ///
193    /// [`vkAcquireNextImageKHR`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkAcquireNextImageKHR
194    acquire: vk::Semaphore,
195
196    /// True if the next command submission operating on this image should wait
197    /// for [`acquire`].
198    ///
199    /// We must wait for `acquire` before drawing to this swapchain image, but
200    /// because `wgpu-hal` queue submissions are always strongly ordered, only
201    /// the first submission that works with a swapchain image actually needs to
202    /// wait. We set this flag when this image is acquired, and clear it the
203    /// first time it's passed to [`Queue::submit`] as a surface texture.
204    ///
205    /// [`acquire`]: SwapchainImageSemaphores::acquire
206    /// [`Queue::submit`]: crate::Queue::submit
207    should_wait_for_acquire: bool,
208
209    /// A pool of semaphores for ordering presentation after drawing.
210    ///
211    /// The first [`present_index`] semaphores in this vector are:
212    ///
213    /// - all waited on by the call to [`vkQueuePresentKHR`] that presents this
214    ///   image, and
215    ///
216    /// - each signaled by some [`vkQueueSubmit`] queue submission that draws to
217    ///   this image, when the submission finishes execution.
218    ///
219    /// This vector accumulates one semaphore per submission that writes to this
220    /// image. This is awkward, but hard to avoid: [`vkQueuePresentKHR`]
221    /// requires a semaphore to order it with respect to drawing commands, and
222    /// we can't attach new completion semaphores to a command submission after
223    /// it's been submitted. This means that, at submission time, we must create
224    /// the semaphore we might need if the caller's next action is to enqueue a
225    /// presentation of this image.
226    ///
227    /// An alternative strategy would be for presentation to enqueue an empty
228    /// submit, ordered relative to other submits in the usual way, and
229    /// signaling a single presentation semaphore. But we suspect that submits
230    /// are usually expensive enough, and semaphores usually cheap enough, that
231    /// performance-sensitive users will avoid making many submits, so that the
232    /// cost of accumulated semaphores will usually be less than the cost of an
233    /// additional submit.
234    ///
235    /// Only the first [`present_index`] semaphores in the vector are actually
236    /// going to be signalled by submitted commands, and need to be waited for
237    /// by the next present call. Any semaphores beyond that index were created
238    /// for prior presents and are simply being retained for recycling.
239    ///
240    /// [`present_index`]: SwapchainImageSemaphores::present_index
241    /// [`vkQueuePresentKHR`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueuePresentKHR
242    /// [`vkQueueSubmit`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueueSubmit
243    present: Vec<vk::Semaphore>,
244
245    /// The number of semaphores in [`present`] to be signalled for this submission.
246    ///
247    /// [`present`]: SwapchainImageSemaphores::present
248    present_index: usize,
249
250    /// The fence value of the last command submission that wrote to this image.
251    ///
252    /// The next time we try to acquire this image, we'll block until
253    /// this submission finishes, proving that [`acquire`] is ready to
254    /// pass to `vkAcquireNextImageKHR` again.
255    ///
256    /// [`acquire`]: SwapchainImageSemaphores::acquire
257    previously_used_submission_index: crate::FenceValue,
258}
259
260impl SwapchainImageSemaphores {
261    fn new(device: &DeviceShared) -> Result<Self, crate::DeviceError> {
262        Ok(Self {
263            acquire: device.new_binary_semaphore()?,
264            should_wait_for_acquire: true,
265            present: Vec::new(),
266            present_index: 0,
267            previously_used_submission_index: 0,
268        })
269    }
270
271    fn set_used_fence_value(&mut self, value: crate::FenceValue) {
272        self.previously_used_submission_index = value;
273    }
274
275    /// Return the semaphore that commands drawing to this image should wait for, if any.
276    ///
277    /// This only returns `Some` once per acquisition; see
278    /// [`SwapchainImageSemaphores::should_wait_for_acquire`] for details.
279    fn get_acquire_wait_semaphore(&mut self) -> Option<vk::Semaphore> {
280        if self.should_wait_for_acquire {
281            self.should_wait_for_acquire = false;
282            Some(self.acquire)
283        } else {
284            None
285        }
286    }
287
288    /// Return a semaphore that a submission that writes to this image should
289    /// signal when it's done.
290    ///
291    /// See [`SwapchainImageSemaphores::present`] for details.
292    fn get_submit_signal_semaphore(
293        &mut self,
294        device: &DeviceShared,
295    ) -> Result<vk::Semaphore, crate::DeviceError> {
296        // Try to recycle a semaphore we created for a previous presentation.
297        let sem = match self.present.get(self.present_index) {
298            Some(sem) => *sem,
299            None => {
300                let sem = device.new_binary_semaphore()?;
301                self.present.push(sem);
302                sem
303            }
304        };
305
306        self.present_index += 1;
307
308        Ok(sem)
309    }
310
311    /// Return the semaphores that a presentation of this image should wait on.
312    ///
313    /// Return a slice of semaphores that the call to [`vkQueueSubmit`] that
314    /// ends this image's acquisition should wait for. See
315    /// [`SwapchainImageSemaphores::present`] for details.
316    ///
317    /// Reset `self` to be ready for the next acquisition cycle.
318    ///
319    /// [`vkQueueSubmit`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueueSubmit
320    fn get_present_wait_semaphores(&mut self) -> &[vk::Semaphore] {
321        let old_index = self.present_index;
322
323        // Since this marks the end of this acquire/draw/present cycle, take the
324        // opportunity to reset `self` in preparation for the next acquisition.
325        self.present_index = 0;
326        self.should_wait_for_acquire = true;
327
328        &self.present[0..old_index]
329    }
330
331    unsafe fn destroy(&self, device: &ash::Device) {
332        unsafe {
333            device.destroy_semaphore(self.acquire, None);
334            for sem in &self.present {
335                device.destroy_semaphore(*sem, None);
336            }
337        }
338    }
339}
340
341struct Swapchain {
342    raw: vk::SwapchainKHR,
343    raw_flags: vk::SwapchainCreateFlagsKHR,
344    functor: khr::swapchain::Device,
345    device: Arc<DeviceShared>,
346    images: Vec<vk::Image>,
347    config: crate::SurfaceConfiguration,
348    view_formats: Vec<wgt::TextureFormat>,
349    /// One wait semaphore per swapchain image. This will be associated with the
350    /// surface texture, and later collected during submission.
351    ///
352    /// We need this to be `Arc<Mutex<>>` because we need to be able to pass this
353    /// data into the surface texture, so submit/present can use it.
354    surface_semaphores: Vec<Arc<Mutex<SwapchainImageSemaphores>>>,
355    /// The index of the next semaphore to use. Ideally we would use the same
356    /// index as the image index, but we need to specify the semaphore as an argument
357    /// to the acquire_next_image function which is what tells us which image to use.
358    next_semaphore_index: usize,
359    /// The present timing information which will be set in the next call to [`present()`](crate::Queue::present()).
360    ///
361    /// # Safety
362    ///
363    /// This must only be set if [`wgt::Features::VULKAN_GOOGLE_DISPLAY_TIMING`] is enabled, and
364    /// so the VK_GOOGLE_display_timing extension is present.
365    next_present_time: Option<vk::PresentTimeGOOGLE>,
366}
367
368impl Swapchain {
369    fn advance_surface_semaphores(&mut self) {
370        let semaphore_count = self.surface_semaphores.len();
371        self.next_semaphore_index = (self.next_semaphore_index + 1) % semaphore_count;
372    }
373
374    fn get_surface_semaphores(&self) -> Arc<Mutex<SwapchainImageSemaphores>> {
375        self.surface_semaphores[self.next_semaphore_index].clone()
376    }
377}
378
379pub struct Surface {
380    raw: vk::SurfaceKHR,
381    functor: khr::surface::Instance,
382    instance: Arc<InstanceShared>,
383    swapchain: RwLock<Option<Swapchain>>,
384}
385
386impl Surface {
387    /// Get the raw Vulkan swapchain associated with this surface.
388    ///
389    /// Returns [`None`] if the surface is not configured.
390    pub fn raw_swapchain(&self) -> Option<vk::SwapchainKHR> {
391        let read = self.swapchain.read();
392        read.as_ref().map(|it| it.raw)
393    }
394
395    /// Set the present timing information which will be used for the next [presentation](crate::Queue::present()) of this surface,
396    /// using [VK_GOOGLE_display_timing].
397    ///
398    /// This can be used to give an id to presentations, for future use of [`vk::PastPresentationTimingGOOGLE`].
399    /// Note that `wgpu-hal` does *not* provide a way to use that API - you should manually access this through [`ash`].
400    ///
401    /// This can also be used to add a "not before" timestamp to the presentation.
402    ///
403    /// The exact semantics of the fields are also documented in the [specification](https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPresentTimeGOOGLE.html) for the extension.
404    ///
405    /// # Panics
406    ///
407    /// - If the surface hasn't been configured.
408    /// - If the device doesn't [support present timing](wgt::Features::VULKAN_GOOGLE_DISPLAY_TIMING).
409    ///
410    /// [VK_GOOGLE_display_timing]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_GOOGLE_display_timing.html
411    #[track_caller]
412    pub fn set_next_present_time(&self, present_timing: vk::PresentTimeGOOGLE) {
413        let mut swapchain = self.swapchain.write();
414        let swapchain = swapchain
415            .as_mut()
416            .expect("Surface should have been configured");
417        let features = wgt::Features::VULKAN_GOOGLE_DISPLAY_TIMING;
418        if swapchain.device.features.contains(features) {
419            swapchain.next_present_time = Some(present_timing);
420        } else {
421            // Ideally we'd use something like `device.required_features` here, but that's in `wgpu-core`, which we are a dependency of
422            panic!(
423                concat!(
424                    "Tried to set display timing properties ",
425                    "without the corresponding feature ({:?}) enabled."
426                ),
427                features
428            );
429        }
430    }
431}
432
433#[derive(Debug)]
434pub struct SurfaceTexture {
435    index: u32,
436    texture: Texture,
437    surface_semaphores: Arc<Mutex<SwapchainImageSemaphores>>,
438}
439
440impl crate::DynSurfaceTexture for SurfaceTexture {}
441
442impl Borrow<Texture> for SurfaceTexture {
443    fn borrow(&self) -> &Texture {
444        &self.texture
445    }
446}
447
448impl Borrow<dyn crate::DynTexture> for SurfaceTexture {
449    fn borrow(&self) -> &dyn crate::DynTexture {
450        &self.texture
451    }
452}
453
454pub struct Adapter {
455    raw: vk::PhysicalDevice,
456    instance: Arc<InstanceShared>,
457    //queue_families: Vec<vk::QueueFamilyProperties>,
458    known_memory_flags: vk::MemoryPropertyFlags,
459    phd_capabilities: adapter::PhysicalDeviceProperties,
460    //phd_features: adapter::PhysicalDeviceFeatures,
461    downlevel_flags: wgt::DownlevelFlags,
462    private_caps: PrivateCapabilities,
463    workarounds: Workarounds,
464}
465
466// TODO there's no reason why this can't be unified--the function pointers should all be the same--it's not clear how to do this with `ash`.
467enum ExtensionFn<T> {
468    /// The loaded function pointer struct for an extension.
469    Extension(T),
470    /// The extension was promoted to a core version of Vulkan and the functions on `ash`'s `DeviceV1_x` traits should be used.
471    Promoted,
472}
473
474struct DeviceExtensionFunctions {
475    debug_utils: Option<ext::debug_utils::Device>,
476    draw_indirect_count: Option<khr::draw_indirect_count::Device>,
477    timeline_semaphore: Option<ExtensionFn<khr::timeline_semaphore::Device>>,
478    ray_tracing: Option<RayTracingDeviceExtensionFunctions>,
479}
480
481struct RayTracingDeviceExtensionFunctions {
482    acceleration_structure: khr::acceleration_structure::Device,
483    buffer_device_address: khr::buffer_device_address::Device,
484}
485
486/// Set of internal capabilities, which don't show up in the exposed
487/// device geometry, but affect the code paths taken internally.
488#[derive(Clone, Debug)]
489struct PrivateCapabilities {
490    /// Y-flipping is implemented with either `VK_AMD_negative_viewport_height` or `VK_KHR_maintenance1`/1.1+. The AMD extension for negative viewport height does not require a Y shift.
491    ///
492    /// This flag is `true` if the device has `VK_KHR_maintenance1`/1.1+ and `false` otherwise (i.e. in the case of `VK_AMD_negative_viewport_height`).
493    flip_y_requires_shift: bool,
494    imageless_framebuffers: bool,
495    image_view_usage: bool,
496    timeline_semaphores: bool,
497    texture_d24: bool,
498    texture_d24_s8: bool,
499    texture_s8: bool,
500    /// Ability to present contents to any screen. Only needed to work around broken platform configurations.
501    can_present: bool,
502    non_coherent_map_mask: wgt::BufferAddress,
503
504    /// True if this adapter advertises the [`robustBufferAccess`][vrba] feature.
505    ///
506    /// Note that Vulkan's `robustBufferAccess` is not sufficient to implement
507    /// `wgpu_hal`'s guarantee that shaders will not access buffer contents via
508    /// a given bindgroup binding outside that binding's [accessible
509    /// region][ar]. Enabling `robustBufferAccess` does ensure that
510    /// out-of-bounds reads and writes are not undefined behavior (that's good),
511    /// but still permits out-of-bounds reads to return data from anywhere
512    /// within the buffer, not just the accessible region.
513    ///
514    /// [ar]: ../struct.BufferBinding.html#accessible-region
515    /// [vrba]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#features-robustBufferAccess
516    robust_buffer_access: bool,
517
518    robust_image_access: bool,
519
520    /// True if this adapter supports the [`VK_EXT_robustness2`] extension's
521    /// [`robustBufferAccess2`] feature.
522    ///
523    /// This is sufficient to implement `wgpu_hal`'s [required bounds-checking][ar] of
524    /// shader accesses to buffer contents. If this feature is not available,
525    /// this backend must have Naga inject bounds checks in the generated
526    /// SPIR-V.
527    ///
528    /// [`VK_EXT_robustness2`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_EXT_robustness2.html
529    /// [`robustBufferAccess2`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPhysicalDeviceRobustness2FeaturesEXT.html#features-robustBufferAccess2
530    /// [ar]: ../struct.BufferBinding.html#accessible-region
531    robust_buffer_access2: bool,
532
533    robust_image_access2: bool,
534    zero_initialize_workgroup_memory: bool,
535    image_format_list: bool,
536    maximum_samplers: u32,
537}
538
539bitflags::bitflags!(
540    /// Workaround flags.
541    #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
542    pub struct Workarounds: u32 {
543        /// Only generate SPIR-V for one entry point at a time.
544        const SEPARATE_ENTRY_POINTS = 0x1;
545        /// Qualcomm OOMs when there are zero color attachments but a non-null pointer
546        /// to a subpass resolve attachment array. This nulls out that pointer in that case.
547        const EMPTY_RESOLVE_ATTACHMENT_LISTS = 0x2;
548        /// If the following code returns false, then nvidia will end up filling the wrong range.
549        ///
550        /// ```skip
551        /// fn nvidia_succeeds() -> bool {
552        ///   # let (copy_length, start_offset) = (0, 0);
553        ///     if copy_length >= 4096 {
554        ///         if start_offset % 16 != 0 {
555        ///             if copy_length == 4096 {
556        ///                 return true;
557        ///             }
558        ///             if copy_length % 16 == 0 {
559        ///                 return false;
560        ///             }
561        ///         }
562        ///     }
563        ///     true
564        /// }
565        /// ```
566        ///
567        /// As such, we need to make sure all calls to vkCmdFillBuffer are aligned to 16 bytes
568        /// if they cover a range of 4096 bytes or more.
569        const FORCE_FILL_BUFFER_WITH_SIZE_GREATER_4096_ALIGNED_OFFSET_16 = 0x4;
570    }
571);
572
573#[derive(Clone, Debug, Eq, Hash, PartialEq)]
574struct AttachmentKey {
575    format: vk::Format,
576    layout: vk::ImageLayout,
577    ops: crate::AttachmentOps,
578}
579
580impl AttachmentKey {
581    /// Returns an attachment key for a compatible attachment.
582    fn compatible(format: vk::Format, layout: vk::ImageLayout) -> Self {
583        Self {
584            format,
585            layout,
586            ops: crate::AttachmentOps::all(),
587        }
588    }
589}
590
591#[derive(Clone, Eq, Hash, PartialEq)]
592struct ColorAttachmentKey {
593    base: AttachmentKey,
594    resolve: Option<AttachmentKey>,
595}
596
597#[derive(Clone, Eq, Hash, PartialEq)]
598struct DepthStencilAttachmentKey {
599    base: AttachmentKey,
600    stencil_ops: crate::AttachmentOps,
601}
602
603#[derive(Clone, Eq, Default, Hash, PartialEq)]
604struct RenderPassKey {
605    colors: ArrayVec<Option<ColorAttachmentKey>, { crate::MAX_COLOR_ATTACHMENTS }>,
606    depth_stencil: Option<DepthStencilAttachmentKey>,
607    sample_count: u32,
608    multiview: Option<NonZeroU32>,
609}
610
611#[derive(Clone, Debug, Eq, Hash, PartialEq)]
612struct FramebufferAttachment {
613    /// Can be NULL if the framebuffer is image-less
614    raw: vk::ImageView,
615    raw_image_flags: vk::ImageCreateFlags,
616    view_usage: crate::TextureUses,
617    view_format: wgt::TextureFormat,
618    raw_view_formats: Vec<vk::Format>,
619}
620
621#[derive(Clone, Eq, Hash, PartialEq)]
622struct FramebufferKey {
623    attachments: ArrayVec<FramebufferAttachment, { MAX_TOTAL_ATTACHMENTS }>,
624    extent: wgt::Extent3d,
625    sample_count: u32,
626}
627
628struct DeviceShared {
629    raw: ash::Device,
630    family_index: u32,
631    queue_index: u32,
632    raw_queue: vk::Queue,
633    drop_guard: Option<crate::DropGuard>,
634    instance: Arc<InstanceShared>,
635    physical_device: vk::PhysicalDevice,
636    enabled_extensions: Vec<&'static CStr>,
637    extension_fns: DeviceExtensionFunctions,
638    vendor_id: u32,
639    pipeline_cache_validation_key: [u8; 16],
640    timestamp_period: f32,
641    private_caps: PrivateCapabilities,
642    workarounds: Workarounds,
643    features: wgt::Features,
644    render_passes: Mutex<rustc_hash::FxHashMap<RenderPassKey, vk::RenderPass>>,
645    framebuffers: Mutex<rustc_hash::FxHashMap<FramebufferKey, vk::Framebuffer>>,
646    sampler_cache: Mutex<sampler::SamplerCache>,
647    memory_allocations_counter: InternalCounter,
648}
649
650impl Drop for DeviceShared {
651    fn drop(&mut self) {
652        for &raw in self.render_passes.lock().values() {
653            unsafe { self.raw.destroy_render_pass(raw, None) };
654        }
655        for &raw in self.framebuffers.lock().values() {
656            unsafe { self.raw.destroy_framebuffer(raw, None) };
657        }
658        if self.drop_guard.is_none() {
659            unsafe { self.raw.destroy_device(None) };
660        }
661    }
662}
663
664pub struct Device {
665    shared: Arc<DeviceShared>,
666    mem_allocator: Mutex<gpu_alloc::GpuAllocator<vk::DeviceMemory>>,
667    desc_allocator:
668        Mutex<gpu_descriptor::DescriptorAllocator<vk::DescriptorPool, vk::DescriptorSet>>,
669    valid_ash_memory_types: u32,
670    naga_options: naga::back::spv::Options<'static>,
671    #[cfg(feature = "renderdoc")]
672    render_doc: crate::auxil::renderdoc::RenderDoc,
673    counters: Arc<wgt::HalCounters>,
674}
675
676impl Drop for Device {
677    fn drop(&mut self) {
678        unsafe { self.mem_allocator.lock().cleanup(&*self.shared) };
679        unsafe { self.desc_allocator.lock().cleanup(&*self.shared) };
680    }
681}
682
683/// Semaphores for forcing queue submissions to run in order.
684///
685/// The [`wgpu_hal::Queue`] trait promises that if two calls to [`submit`] are
686/// ordered, then the first submission will finish on the GPU before the second
687/// submission begins. To get this behavior on Vulkan we need to pass semaphores
688/// to [`vkQueueSubmit`] for the commands to wait on before beginning execution,
689/// and to signal when their execution is done.
690///
691/// Normally this can be done with a single semaphore, waited on and then
692/// signalled for each submission. At any given time there's exactly one
693/// submission that would signal the semaphore, and exactly one waiting on it,
694/// as Vulkan requires.
695///
696/// However, as of Oct 2021, bug [#5508] in the Mesa ANV drivers caused them to
697/// hang if we use a single semaphore. The workaround is to alternate between
698/// two semaphores. The bug has been fixed in Mesa, but we should probably keep
699/// the workaround until, say, Oct 2026.
700///
701/// [`wgpu_hal::Queue`]: crate::Queue
702/// [`submit`]: crate::Queue::submit
703/// [`vkQueueSubmit`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueueSubmit
704/// [#5508]: https://gitlab.freedesktop.org/mesa/mesa/-/issues/5508
705#[derive(Clone)]
706struct RelaySemaphores {
707    /// The semaphore the next submission should wait on before beginning
708    /// execution on the GPU. This is `None` for the first submission, which
709    /// should not wait on anything at all.
710    wait: Option<vk::Semaphore>,
711
712    /// The semaphore the next submission should signal when it has finished
713    /// execution on the GPU.
714    signal: vk::Semaphore,
715}
716
717impl RelaySemaphores {
718    fn new(device: &DeviceShared) -> Result<Self, crate::DeviceError> {
719        Ok(Self {
720            wait: None,
721            signal: device.new_binary_semaphore()?,
722        })
723    }
724
725    /// Advances the semaphores, returning the semaphores that should be used for a submission.
726    fn advance(&mut self, device: &DeviceShared) -> Result<Self, crate::DeviceError> {
727        let old = self.clone();
728
729        // Build the state for the next submission.
730        match self.wait {
731            None => {
732                // The `old` values describe the first submission to this queue.
733                // The second submission should wait on `old.signal`, and then
734                // signal a new semaphore which we'll create now.
735                self.wait = Some(old.signal);
736                self.signal = device.new_binary_semaphore()?;
737            }
738            Some(ref mut wait) => {
739                // What this submission signals, the next should wait.
740                mem::swap(wait, &mut self.signal);
741            }
742        };
743
744        Ok(old)
745    }
746
747    /// Destroys the semaphores.
748    unsafe fn destroy(&self, device: &ash::Device) {
749        unsafe {
750            if let Some(wait) = self.wait {
751                device.destroy_semaphore(wait, None);
752            }
753            device.destroy_semaphore(self.signal, None);
754        }
755    }
756}
757
758pub struct Queue {
759    raw: vk::Queue,
760    swapchain_fn: khr::swapchain::Device,
761    device: Arc<DeviceShared>,
762    family_index: u32,
763    relay_semaphores: Mutex<RelaySemaphores>,
764}
765
766impl Drop for Queue {
767    fn drop(&mut self) {
768        unsafe { self.relay_semaphores.lock().destroy(&self.device.raw) };
769    }
770}
771
772#[derive(Debug)]
773pub struct Buffer {
774    raw: vk::Buffer,
775    block: Option<Mutex<gpu_alloc::MemoryBlock<vk::DeviceMemory>>>,
776}
777
778impl crate::DynBuffer for Buffer {}
779
780#[derive(Debug)]
781pub struct AccelerationStructure {
782    raw: vk::AccelerationStructureKHR,
783    buffer: vk::Buffer,
784    block: Mutex<gpu_alloc::MemoryBlock<vk::DeviceMemory>>,
785}
786
787impl crate::DynAccelerationStructure for AccelerationStructure {}
788
789#[derive(Debug)]
790pub struct Texture {
791    raw: vk::Image,
792    drop_guard: Option<crate::DropGuard>,
793    external_memory: Option<vk::DeviceMemory>,
794    block: Option<gpu_alloc::MemoryBlock<vk::DeviceMemory>>,
795    usage: crate::TextureUses,
796    format: wgt::TextureFormat,
797    raw_flags: vk::ImageCreateFlags,
798    copy_size: crate::CopyExtent,
799    view_formats: Vec<wgt::TextureFormat>,
800}
801
802impl crate::DynTexture for Texture {}
803
804impl Texture {
805    /// # Safety
806    ///
807    /// - The image handle must not be manually destroyed
808    pub unsafe fn raw_handle(&self) -> vk::Image {
809        self.raw
810    }
811}
812
813#[derive(Debug)]
814pub struct TextureView {
815    raw: vk::ImageView,
816    layers: NonZeroU32,
817    attachment: FramebufferAttachment,
818}
819
820impl crate::DynTextureView for TextureView {}
821
822impl TextureView {
823    /// # Safety
824    ///
825    /// - The image view handle must not be manually destroyed
826    pub unsafe fn raw_handle(&self) -> vk::ImageView {
827        self.raw
828    }
829}
830
831#[derive(Debug)]
832pub struct Sampler {
833    raw: vk::Sampler,
834    create_info: vk::SamplerCreateInfo<'static>,
835}
836
837impl crate::DynSampler for Sampler {}
838
839#[derive(Debug)]
840pub struct BindGroupLayout {
841    raw: vk::DescriptorSetLayout,
842    desc_count: gpu_descriptor::DescriptorTotalCount,
843    types: Box<[(vk::DescriptorType, u32)]>,
844    /// Map of binding index to size,
845    binding_arrays: Vec<(u32, NonZeroU32)>,
846}
847
848impl crate::DynBindGroupLayout for BindGroupLayout {}
849
850#[derive(Debug)]
851pub struct PipelineLayout {
852    raw: vk::PipelineLayout,
853    binding_arrays: naga::back::spv::BindingMap,
854}
855
856impl crate::DynPipelineLayout for PipelineLayout {}
857
858#[derive(Debug)]
859pub struct BindGroup {
860    set: gpu_descriptor::DescriptorSet<vk::DescriptorSet>,
861}
862
863impl crate::DynBindGroup for BindGroup {}
864
865/// Miscellaneous allocation recycling pool for `CommandAllocator`.
866#[derive(Default)]
867struct Temp {
868    marker: Vec<u8>,
869    buffer_barriers: Vec<vk::BufferMemoryBarrier<'static>>,
870    image_barriers: Vec<vk::ImageMemoryBarrier<'static>>,
871}
872
873impl Temp {
874    fn clear(&mut self) {
875        self.marker.clear();
876        self.buffer_barriers.clear();
877        self.image_barriers.clear();
878    }
879
880    fn make_c_str(&mut self, name: &str) -> &CStr {
881        self.marker.clear();
882        self.marker.extend_from_slice(name.as_bytes());
883        self.marker.push(0);
884        unsafe { CStr::from_bytes_with_nul_unchecked(&self.marker) }
885    }
886}
887
888pub struct CommandEncoder {
889    raw: vk::CommandPool,
890    device: Arc<DeviceShared>,
891
892    /// The current command buffer, if `self` is in the ["recording"]
893    /// state.
894    ///
895    /// ["recording"]: crate::CommandEncoder
896    ///
897    /// If non-`null`, the buffer is in the Vulkan "recording" state.
898    active: vk::CommandBuffer,
899
900    /// What kind of pass we are currently within: compute or render.
901    bind_point: vk::PipelineBindPoint,
902
903    /// Allocation recycling pool for this encoder.
904    temp: Temp,
905
906    /// A pool of available command buffers.
907    ///
908    /// These are all in the Vulkan "initial" state.
909    free: Vec<vk::CommandBuffer>,
910
911    /// A pool of discarded command buffers.
912    ///
913    /// These could be in any Vulkan state except "pending".
914    discarded: Vec<vk::CommandBuffer>,
915
916    /// If this is true, the active renderpass enabled a debug span,
917    /// and needs to be disabled on renderpass close.
918    rpass_debug_marker_active: bool,
919
920    /// If set, the end of the next render/compute pass will write a timestamp at
921    /// the given pool & location.
922    end_of_pass_timer_query: Option<(vk::QueryPool, u32)>,
923
924    counters: Arc<wgt::HalCounters>,
925}
926
927impl Drop for CommandEncoder {
928    fn drop(&mut self) {
929        // SAFETY:
930        //
931        // VUID-vkDestroyCommandPool-commandPool-00041: wgpu_hal requires that a
932        // `CommandBuffer` must live until its execution is complete, and that a
933        // `CommandBuffer` must not outlive the `CommandEncoder` that built it.
934        // Thus, we know that none of our `CommandBuffers` are in the "pending"
935        // state.
936        //
937        // The other VUIDs are pretty obvious.
938        unsafe {
939            // `vkDestroyCommandPool` also frees any command buffers allocated
940            // from that pool, so there's no need to explicitly call
941            // `vkFreeCommandBuffers` on `cmd_encoder`'s `free` and `discarded`
942            // fields.
943            self.device.raw.destroy_command_pool(self.raw, None);
944        }
945        self.counters.command_encoders.sub(1);
946    }
947}
948
949impl CommandEncoder {
950    /// # Safety
951    ///
952    /// - The command buffer handle must not be manually destroyed
953    pub unsafe fn raw_handle(&self) -> vk::CommandBuffer {
954        self.active
955    }
956}
957
958impl fmt::Debug for CommandEncoder {
959    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
960        f.debug_struct("CommandEncoder")
961            .field("raw", &self.raw)
962            .finish()
963    }
964}
965
966#[derive(Debug)]
967pub struct CommandBuffer {
968    raw: vk::CommandBuffer,
969}
970
971impl crate::DynCommandBuffer for CommandBuffer {}
972
973#[derive(Debug)]
974#[allow(clippy::large_enum_variant)]
975pub enum ShaderModule {
976    Raw(vk::ShaderModule),
977    Intermediate {
978        naga_shader: crate::NagaShader,
979        runtime_checks: wgt::ShaderRuntimeChecks,
980    },
981}
982
983impl crate::DynShaderModule for ShaderModule {}
984
985#[derive(Debug)]
986pub struct RenderPipeline {
987    raw: vk::Pipeline,
988}
989
990impl crate::DynRenderPipeline for RenderPipeline {}
991
992#[derive(Debug)]
993pub struct ComputePipeline {
994    raw: vk::Pipeline,
995}
996
997impl crate::DynComputePipeline for ComputePipeline {}
998
999#[derive(Debug)]
1000pub struct PipelineCache {
1001    raw: vk::PipelineCache,
1002}
1003
1004impl crate::DynPipelineCache for PipelineCache {}
1005
1006#[derive(Debug)]
1007pub struct QuerySet {
1008    raw: vk::QueryPool,
1009}
1010
1011impl crate::DynQuerySet for QuerySet {}
1012
1013/// The [`Api::Fence`] type for [`vulkan::Api`].
1014///
1015/// This is an `enum` because there are two possible implementations of
1016/// `wgpu-hal` fences on Vulkan: Vulkan fences, which work on any version of
1017/// Vulkan, and Vulkan timeline semaphores, which are easier and cheaper but
1018/// require non-1.0 features.
1019///
1020/// [`Device::create_fence`] returns a [`TimelineSemaphore`] if
1021/// [`VK_KHR_timeline_semaphore`] is available and enabled, and a [`FencePool`]
1022/// otherwise.
1023///
1024/// [`Api::Fence`]: crate::Api::Fence
1025/// [`vulkan::Api`]: Api
1026/// [`Device::create_fence`]: crate::Device::create_fence
1027/// [`TimelineSemaphore`]: Fence::TimelineSemaphore
1028/// [`VK_KHR_timeline_semaphore`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VK_KHR_timeline_semaphore
1029/// [`FencePool`]: Fence::FencePool
1030#[derive(Debug)]
1031pub enum Fence {
1032    /// A Vulkan [timeline semaphore].
1033    ///
1034    /// These are simpler to use than Vulkan fences, since timeline semaphores
1035    /// work exactly the way [`wpgu_hal::Api::Fence`] is specified to work.
1036    ///
1037    /// [timeline semaphore]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-semaphores
1038    /// [`wpgu_hal::Api::Fence`]: crate::Api::Fence
1039    TimelineSemaphore(vk::Semaphore),
1040
1041    /// A collection of Vulkan [fence]s, each associated with a [`FenceValue`].
1042    ///
1043    /// The effective [`FenceValue`] of this variant is the greater of
1044    /// `last_completed` and the maximum value associated with a signalled fence
1045    /// in `active`.
1046    ///
1047    /// Fences are available in all versions of Vulkan, but since they only have
1048    /// two states, "signaled" and "unsignaled", we need to use a separate fence
1049    /// for each queue submission we might want to wait for, and remember which
1050    /// [`FenceValue`] each one represents.
1051    ///
1052    /// [fence]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-fences
1053    /// [`FenceValue`]: crate::FenceValue
1054    FencePool {
1055        last_completed: crate::FenceValue,
1056        /// The pending fence values have to be ascending.
1057        active: Vec<(crate::FenceValue, vk::Fence)>,
1058        free: Vec<vk::Fence>,
1059    },
1060}
1061
1062impl crate::DynFence for Fence {}
1063
1064impl Fence {
1065    /// Return the highest [`FenceValue`] among the signalled fences in `active`.
1066    ///
1067    /// As an optimization, assume that we already know that the fence has
1068    /// reached `last_completed`, and don't bother checking fences whose values
1069    /// are less than that: those fences remain in the `active` array only
1070    /// because we haven't called `maintain` yet to clean them up.
1071    ///
1072    /// [`FenceValue`]: crate::FenceValue
1073    fn check_active(
1074        device: &ash::Device,
1075        mut last_completed: crate::FenceValue,
1076        active: &[(crate::FenceValue, vk::Fence)],
1077    ) -> Result<crate::FenceValue, crate::DeviceError> {
1078        for &(value, raw) in active.iter() {
1079            unsafe {
1080                if value > last_completed
1081                    && device
1082                        .get_fence_status(raw)
1083                        .map_err(map_host_device_oom_and_lost_err)?
1084                {
1085                    last_completed = value;
1086                }
1087            }
1088        }
1089        Ok(last_completed)
1090    }
1091
1092    /// Return the highest signalled [`FenceValue`] for `self`.
1093    ///
1094    /// [`FenceValue`]: crate::FenceValue
1095    fn get_latest(
1096        &self,
1097        device: &ash::Device,
1098        extension: Option<&ExtensionFn<khr::timeline_semaphore::Device>>,
1099    ) -> Result<crate::FenceValue, crate::DeviceError> {
1100        match *self {
1101            Self::TimelineSemaphore(raw) => unsafe {
1102                Ok(match *extension.unwrap() {
1103                    ExtensionFn::Extension(ref ext) => ext
1104                        .get_semaphore_counter_value(raw)
1105                        .map_err(map_host_device_oom_and_lost_err)?,
1106                    ExtensionFn::Promoted => device
1107                        .get_semaphore_counter_value(raw)
1108                        .map_err(map_host_device_oom_and_lost_err)?,
1109                })
1110            },
1111            Self::FencePool {
1112                last_completed,
1113                ref active,
1114                free: _,
1115            } => Self::check_active(device, last_completed, active),
1116        }
1117    }
1118
1119    /// Trim the internal state of this [`Fence`].
1120    ///
1121    /// This function has no externally visible effect, but you should call it
1122    /// periodically to keep this fence's resource consumption under control.
1123    ///
1124    /// For fences using the [`FencePool`] implementation, this function
1125    /// recycles fences that have been signaled. If you don't call this,
1126    /// [`Queue::submit`] will just keep allocating a new Vulkan fence every
1127    /// time it's called.
1128    ///
1129    /// [`FencePool`]: Fence::FencePool
1130    /// [`Queue::submit`]: crate::Queue::submit
1131    fn maintain(&mut self, device: &ash::Device) -> Result<(), crate::DeviceError> {
1132        match *self {
1133            Self::TimelineSemaphore(_) => {}
1134            Self::FencePool {
1135                ref mut last_completed,
1136                ref mut active,
1137                ref mut free,
1138            } => {
1139                let latest = Self::check_active(device, *last_completed, active)?;
1140                let base_free = free.len();
1141                for &(value, raw) in active.iter() {
1142                    if value <= latest {
1143                        free.push(raw);
1144                    }
1145                }
1146                if free.len() != base_free {
1147                    active.retain(|&(value, _)| value > latest);
1148                    unsafe { device.reset_fences(&free[base_free..]) }
1149                        .map_err(map_device_oom_err)?
1150                }
1151                *last_completed = latest;
1152            }
1153        }
1154        Ok(())
1155    }
1156}
1157
1158impl crate::Queue for Queue {
1159    type A = Api;
1160
1161    unsafe fn submit(
1162        &self,
1163        command_buffers: &[&CommandBuffer],
1164        surface_textures: &[&SurfaceTexture],
1165        (signal_fence, signal_value): (&mut Fence, crate::FenceValue),
1166    ) -> Result<(), crate::DeviceError> {
1167        let mut fence_raw = vk::Fence::null();
1168
1169        let mut wait_stage_masks = Vec::new();
1170        let mut wait_semaphores = Vec::new();
1171        let mut signal_semaphores = Vec::new();
1172        let mut signal_values = Vec::new();
1173
1174        // Double check that the same swapchain image isn't being given to us multiple times,
1175        // as that will deadlock when we try to lock them all.
1176        debug_assert!(
1177            {
1178                let mut check = HashSet::with_capacity(surface_textures.len());
1179                // We compare the Arcs by pointer, as Eq isn't well defined for SurfaceSemaphores.
1180                for st in surface_textures {
1181                    check.insert(Arc::as_ptr(&st.surface_semaphores));
1182                }
1183                check.len() == surface_textures.len()
1184            },
1185            "More than one surface texture is being used from the same swapchain. This will cause a deadlock in release."
1186        );
1187
1188        let locked_swapchain_semaphores = surface_textures
1189            .iter()
1190            .map(|st| {
1191                st.surface_semaphores
1192                    .try_lock()
1193                    .expect("Failed to lock surface semaphore.")
1194            })
1195            .collect::<Vec<_>>();
1196
1197        for mut swapchain_semaphore in locked_swapchain_semaphores {
1198            swapchain_semaphore.set_used_fence_value(signal_value);
1199
1200            // If we're the first submission to operate on this image, wait on
1201            // its acquire semaphore, to make sure the presentation engine is
1202            // done with it.
1203            if let Some(sem) = swapchain_semaphore.get_acquire_wait_semaphore() {
1204                wait_stage_masks.push(vk::PipelineStageFlags::TOP_OF_PIPE);
1205                wait_semaphores.push(sem);
1206            }
1207
1208            // Get a semaphore to signal when we're done writing to this surface
1209            // image. Presentation of this image will wait for this.
1210            let signal_semaphore = swapchain_semaphore.get_submit_signal_semaphore(&self.device)?;
1211            signal_semaphores.push(signal_semaphore);
1212            signal_values.push(!0);
1213        }
1214
1215        // In order for submissions to be strictly ordered, we encode a dependency between each submission
1216        // using a pair of semaphores. This adds a wait if it is needed, and signals the next semaphore.
1217        let semaphore_state = self.relay_semaphores.lock().advance(&self.device)?;
1218
1219        if let Some(sem) = semaphore_state.wait {
1220            wait_stage_masks.push(vk::PipelineStageFlags::TOP_OF_PIPE);
1221            wait_semaphores.push(sem);
1222        }
1223
1224        signal_semaphores.push(semaphore_state.signal);
1225        signal_values.push(!0);
1226
1227        // We need to signal our wgpu::Fence if we have one, this adds it to the signal list.
1228        signal_fence.maintain(&self.device.raw)?;
1229        match *signal_fence {
1230            Fence::TimelineSemaphore(raw) => {
1231                signal_semaphores.push(raw);
1232                signal_values.push(signal_value);
1233            }
1234            Fence::FencePool {
1235                ref mut active,
1236                ref mut free,
1237                ..
1238            } => {
1239                fence_raw = match free.pop() {
1240                    Some(raw) => raw,
1241                    None => unsafe {
1242                        self.device
1243                            .raw
1244                            .create_fence(&vk::FenceCreateInfo::default(), None)
1245                            .map_err(map_host_device_oom_err)?
1246                    },
1247                };
1248                active.push((signal_value, fence_raw));
1249            }
1250        }
1251
1252        let vk_cmd_buffers = command_buffers
1253            .iter()
1254            .map(|cmd| cmd.raw)
1255            .collect::<Vec<_>>();
1256
1257        let mut vk_info = vk::SubmitInfo::default().command_buffers(&vk_cmd_buffers);
1258
1259        vk_info = vk_info
1260            .wait_semaphores(&wait_semaphores)
1261            .wait_dst_stage_mask(&wait_stage_masks)
1262            .signal_semaphores(&signal_semaphores);
1263
1264        let mut vk_timeline_info;
1265
1266        if self.device.private_caps.timeline_semaphores {
1267            vk_timeline_info =
1268                vk::TimelineSemaphoreSubmitInfo::default().signal_semaphore_values(&signal_values);
1269            vk_info = vk_info.push_next(&mut vk_timeline_info);
1270        }
1271
1272        profiling::scope!("vkQueueSubmit");
1273        unsafe {
1274            self.device
1275                .raw
1276                .queue_submit(self.raw, &[vk_info], fence_raw)
1277                .map_err(map_host_device_oom_and_lost_err)?
1278        };
1279        Ok(())
1280    }
1281
1282    unsafe fn present(
1283        &self,
1284        surface: &Surface,
1285        texture: SurfaceTexture,
1286    ) -> Result<(), crate::SurfaceError> {
1287        let mut swapchain = surface.swapchain.write();
1288        let ssc = swapchain.as_mut().unwrap();
1289        let mut swapchain_semaphores = texture.surface_semaphores.lock();
1290
1291        let swapchains = [ssc.raw];
1292        let image_indices = [texture.index];
1293        let vk_info = vk::PresentInfoKHR::default()
1294            .swapchains(&swapchains)
1295            .image_indices(&image_indices)
1296            .wait_semaphores(swapchain_semaphores.get_present_wait_semaphores());
1297
1298        let mut display_timing;
1299        let present_times;
1300        let vk_info = if let Some(present_time) = ssc.next_present_time.take() {
1301            debug_assert!(
1302                ssc.device
1303                    .features
1304                    .contains(wgt::Features::VULKAN_GOOGLE_DISPLAY_TIMING),
1305                "`next_present_time` should only be set if `VULKAN_GOOGLE_DISPLAY_TIMING` is enabled"
1306            );
1307            present_times = [present_time];
1308            display_timing = vk::PresentTimesInfoGOOGLE::default().times(&present_times);
1309            // SAFETY: We know that VK_GOOGLE_display_timing is present because of the safety contract on `next_present_time`.
1310            vk_info.push_next(&mut display_timing)
1311        } else {
1312            vk_info
1313        };
1314
1315        let suboptimal = {
1316            profiling::scope!("vkQueuePresentKHR");
1317            unsafe { self.swapchain_fn.queue_present(self.raw, &vk_info) }.map_err(|error| {
1318                match error {
1319                    vk::Result::ERROR_OUT_OF_DATE_KHR => crate::SurfaceError::Outdated,
1320                    vk::Result::ERROR_SURFACE_LOST_KHR => crate::SurfaceError::Lost,
1321                    // We don't use VK_EXT_full_screen_exclusive
1322                    // VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT
1323                    _ => map_host_device_oom_and_lost_err(error).into(),
1324                }
1325            })?
1326        };
1327        if suboptimal {
1328            // We treat `VK_SUBOPTIMAL_KHR` as `VK_SUCCESS` on Android.
1329            // On Android 10+, libvulkan's `vkQueuePresentKHR` implementation returns `VK_SUBOPTIMAL_KHR` if not doing pre-rotation
1330            // (i.e `VkSwapchainCreateInfoKHR::preTransform` not being equal to the current device orientation).
1331            // This is always the case when the device orientation is anything other than the identity one, as we unconditionally use `VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR`.
1332            #[cfg(not(target_os = "android"))]
1333            log::warn!("Suboptimal present of frame {}", texture.index);
1334        }
1335        Ok(())
1336    }
1337
1338    unsafe fn get_timestamp_period(&self) -> f32 {
1339        self.device.timestamp_period
1340    }
1341}
1342
1343/// Maps
1344///
1345/// - VK_ERROR_OUT_OF_HOST_MEMORY
1346/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1347fn map_host_device_oom_err(err: vk::Result) -> crate::DeviceError {
1348    match err {
1349        vk::Result::ERROR_OUT_OF_HOST_MEMORY | vk::Result::ERROR_OUT_OF_DEVICE_MEMORY => {
1350            get_oom_err(err)
1351        }
1352        e => get_unexpected_err(e),
1353    }
1354}
1355
1356/// Maps
1357///
1358/// - VK_ERROR_OUT_OF_HOST_MEMORY
1359/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1360/// - VK_ERROR_DEVICE_LOST
1361fn map_host_device_oom_and_lost_err(err: vk::Result) -> crate::DeviceError {
1362    match err {
1363        vk::Result::ERROR_DEVICE_LOST => get_lost_err(),
1364        other => map_host_device_oom_err(other),
1365    }
1366}
1367
1368/// Maps
1369///
1370/// - VK_ERROR_OUT_OF_HOST_MEMORY
1371/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1372/// - VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1373fn map_host_device_oom_and_ioca_err(err: vk::Result) -> crate::DeviceError {
1374    // We don't use VK_KHR_buffer_device_address
1375    // VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1376    map_host_device_oom_err(err)
1377}
1378
1379/// Maps
1380///
1381/// - VK_ERROR_OUT_OF_HOST_MEMORY
1382fn map_host_oom_err(err: vk::Result) -> crate::DeviceError {
1383    match err {
1384        vk::Result::ERROR_OUT_OF_HOST_MEMORY => get_oom_err(err),
1385        e => get_unexpected_err(e),
1386    }
1387}
1388
1389/// Maps
1390///
1391/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1392fn map_device_oom_err(err: vk::Result) -> crate::DeviceError {
1393    match err {
1394        vk::Result::ERROR_OUT_OF_DEVICE_MEMORY => get_oom_err(err),
1395        e => get_unexpected_err(e),
1396    }
1397}
1398
1399/// Maps
1400///
1401/// - VK_ERROR_OUT_OF_HOST_MEMORY
1402/// - VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1403fn map_host_oom_and_ioca_err(err: vk::Result) -> crate::DeviceError {
1404    // We don't use VK_KHR_buffer_device_address
1405    // VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1406    map_host_oom_err(err)
1407}
1408
1409/// Maps
1410///
1411/// - VK_ERROR_OUT_OF_HOST_MEMORY
1412/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1413/// - VK_PIPELINE_COMPILE_REQUIRED_EXT
1414/// - VK_ERROR_INVALID_SHADER_NV
1415fn map_pipeline_err(err: vk::Result) -> crate::DeviceError {
1416    // We don't use VK_EXT_pipeline_creation_cache_control
1417    // VK_PIPELINE_COMPILE_REQUIRED_EXT
1418    // We don't use VK_NV_glsl_shader
1419    // VK_ERROR_INVALID_SHADER_NV
1420    map_host_device_oom_err(err)
1421}
1422
1423/// Returns [`crate::DeviceError::Unexpected`] or panics if the `internal_error_panic`
1424/// feature flag is enabled.
1425fn get_unexpected_err(_err: vk::Result) -> crate::DeviceError {
1426    #[cfg(feature = "internal_error_panic")]
1427    panic!("Unexpected Vulkan error: {_err:?}");
1428
1429    #[allow(unreachable_code)]
1430    crate::DeviceError::Unexpected
1431}
1432
1433/// Returns [`crate::DeviceError::OutOfMemory`] or panics if the `oom_panic`
1434/// feature flag is enabled.
1435fn get_oom_err(_err: vk::Result) -> crate::DeviceError {
1436    #[cfg(feature = "oom_panic")]
1437    panic!("Out of memory ({_err:?})");
1438
1439    #[allow(unreachable_code)]
1440    crate::DeviceError::OutOfMemory
1441}
1442
1443/// Returns [`crate::DeviceError::Lost`] or panics if the `device_lost_panic`
1444/// feature flag is enabled.
1445fn get_lost_err() -> crate::DeviceError {
1446    #[cfg(feature = "device_lost_panic")]
1447    panic!("Device lost");
1448
1449    #[allow(unreachable_code)]
1450    crate::DeviceError::Lost
1451}
1452
1453#[derive(Clone)]
1454#[repr(C)]
1455struct RawTlasInstance {
1456    transform: [f32; 12],
1457    custom_index_and_mask: u32,
1458    shader_binding_table_record_offset_and_flags: u32,
1459    acceleration_structure_reference: u64,
1460}