azul_webrender/renderer/
gpu_cache.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4
5use std::{cmp, mem};
6use api::units::*;
7use malloc_size_of::MallocSizeOfOps;
8use crate::{
9    device::{CustomVAO, Device, DrawTarget, Program, ReadTarget, Texture, TextureFilter, UploadPBOPool, VBO},
10    gpu_cache::{GpuBlockData, GpuCacheUpdate, GpuCacheUpdateList},
11    internal_types::{RenderTargetInfo, Swizzle},
12    prim_store::DeferredResolve,
13    profiler,
14    render_api::MemoryReport,
15    render_backend::FrameId,
16};
17
18/// Enabling this toggle would force the GPU cache scattered texture to
19/// be resized every frame, which enables GPU debuggers to see if this
20/// is performed correctly.
21const GPU_CACHE_RESIZE_TEST: bool = false;
22
23/// Tracks the state of each row in the GPU cache texture.
24struct CacheRow {
25    /// Mirrored block data on CPU for this row. We store a copy of
26    /// the data on the CPU side to improve upload batching.
27    cpu_blocks: Box<[GpuBlockData; super::MAX_VERTEX_TEXTURE_WIDTH]>,
28    /// The first offset in this row that is dirty.
29    min_dirty: u16,
30    /// The last offset in this row that is dirty.
31    max_dirty: u16,
32}
33
34impl CacheRow {
35    fn new() -> Self {
36        CacheRow {
37            cpu_blocks: Box::new([GpuBlockData::EMPTY; super::MAX_VERTEX_TEXTURE_WIDTH]),
38            min_dirty: super::MAX_VERTEX_TEXTURE_WIDTH as _,
39            max_dirty: 0,
40        }
41    }
42
43    fn is_dirty(&self) -> bool {
44        return self.min_dirty < self.max_dirty;
45    }
46
47    fn clear_dirty(&mut self) {
48        self.min_dirty = super::MAX_VERTEX_TEXTURE_WIDTH as _;
49        self.max_dirty = 0;
50    }
51
52    fn add_dirty(&mut self, block_offset: usize, block_count: usize) {
53        self.min_dirty = self.min_dirty.min(block_offset as _);
54        self.max_dirty = self.max_dirty.max((block_offset + block_count) as _);
55    }
56
57    fn dirty_blocks(&self) -> &[GpuBlockData] {
58        return &self.cpu_blocks[self.min_dirty as usize .. self.max_dirty as usize];
59    }
60}
61
62/// The bus over which CPU and GPU versions of the GPU cache
63/// get synchronized.
64enum GpuCacheBus {
65    /// PBO-based updates, currently operate on a row granularity.
66    /// Therefore, are subject to fragmentation issues.
67    PixelBuffer {
68        /// Per-row data.
69        rows: Vec<CacheRow>,
70    },
71    /// Shader-based scattering updates. Currently rendered by a set
72    /// of points into the GPU texture, each carrying a `GpuBlockData`.
73    Scatter {
74        /// Special program to run the scattered update.
75        program: Program,
76        /// VAO containing the source vertex buffers.
77        vao: CustomVAO,
78        /// VBO for positional data, supplied as normalized `u16`.
79        buf_position: VBO<[u16; 2]>,
80        /// VBO for gpu block data.
81        buf_value: VBO<GpuBlockData>,
82        /// Currently stored block count.
83        count: usize,
84    },
85}
86
87/// The device-specific representation of the cache texture in gpu_cache.rs
88pub struct GpuCacheTexture {
89    texture: Option<Texture>,
90    bus: GpuCacheBus,
91}
92
93impl GpuCacheTexture {
94    /// Ensures that we have an appropriately-sized texture.
95    fn ensure_texture(&mut self, device: &mut Device, height: i32) {
96        // If we already have a texture that works, we're done.
97        if self.texture.as_ref().map_or(false, |t| t.get_dimensions().height >= height) {
98            if GPU_CACHE_RESIZE_TEST {
99                // Special debug mode - resize the texture even though it's fine.
100            } else {
101                return;
102            }
103        }
104
105        // Take the old texture, if any.
106        let blit_source = self.texture.take();
107
108        // Create the new texture.
109        assert!(height >= 2, "Height is too small for ANGLE");
110        let new_size = DeviceIntSize::new(super::MAX_VERTEX_TEXTURE_WIDTH as _, height);
111        // GpuCacheBus::Scatter always requires the texture to be a render target. For
112        // GpuCacheBus::PixelBuffer, we only create the texture with a render target if
113        // RGBAF32 render targets are actually supported, and only if glCopyImageSubData
114        // is not. glCopyImageSubData does not require a render target to copy the texture
115        // data, and if neither RGBAF32 render targets nor glCopyImageSubData is supported,
116        // we simply re-upload the entire contents rather than copying upon resize.
117        let supports_copy_image_sub_data = device.get_capabilities().supports_copy_image_sub_data;
118        let supports_color_buffer_float = device.get_capabilities().supports_color_buffer_float;
119        let rt_info = if matches!(self.bus, GpuCacheBus::PixelBuffer { .. })
120            && (supports_copy_image_sub_data || !supports_color_buffer_float)
121        {
122            None
123        } else {
124            Some(RenderTargetInfo { has_depth: false })
125        };
126        let mut texture = device.create_texture(
127            api::ImageBufferKind::Texture2D,
128            api::ImageFormat::RGBAF32,
129            new_size.width,
130            new_size.height,
131            TextureFilter::Nearest,
132            rt_info,
133        );
134
135        // Copy the contents of the previous texture, if applicable.
136        if let Some(blit_source) = blit_source {
137            if !supports_copy_image_sub_data && !supports_color_buffer_float {
138                // Cannot copy texture, so must re-upload everything.
139                match self.bus {
140                    GpuCacheBus::PixelBuffer { ref mut rows } => {
141                        for row in rows {
142                            row.add_dirty(0, super::MAX_VERTEX_TEXTURE_WIDTH);
143                        }
144                    }
145                    GpuCacheBus::Scatter { .. } => {
146                        panic!("Texture must be copyable to use scatter GPU cache bus method");
147                    }
148                }
149            } else {
150                device.copy_entire_texture(&mut texture, &blit_source);
151            }
152            device.delete_texture(blit_source);
153        }
154
155        self.texture = Some(texture);
156    }
157
158    pub fn new(device: &mut Device, use_scatter: bool) -> Result<Self, super::RendererError> {
159        use super::desc::GPU_CACHE_UPDATE;
160
161        let bus = if use_scatter {
162            assert!(
163                device.get_capabilities().supports_color_buffer_float,
164                "GpuCache scatter method requires EXT_color_buffer_float",
165            );
166            let program = device.create_program_linked(
167                "gpu_cache_update",
168                &[],
169                &GPU_CACHE_UPDATE,
170            )?;
171            let buf_position = device.create_vbo();
172            let buf_value = device.create_vbo();
173            //Note: the vertex attributes have to be supplied in the same order
174            // as for program creation, but each assigned to a different stream.
175            let vao = device.create_custom_vao(&[
176                buf_position.stream_with(&GPU_CACHE_UPDATE.vertex_attributes[0..1]),
177                buf_value   .stream_with(&GPU_CACHE_UPDATE.vertex_attributes[1..2]),
178            ]);
179            GpuCacheBus::Scatter {
180                program,
181                vao,
182                buf_position,
183                buf_value,
184                count: 0,
185            }
186        } else {
187            GpuCacheBus::PixelBuffer {
188                rows: Vec::new(),
189            }
190        };
191
192        Ok(GpuCacheTexture {
193            texture: None,
194            bus,
195        })
196    }
197
198    pub fn deinit(mut self, device: &mut Device) {
199        if let Some(t) = self.texture.take() {
200            device.delete_texture(t);
201        }
202        if let GpuCacheBus::Scatter { program, vao, buf_position, buf_value, .. } = self.bus {
203            device.delete_program(program);
204            device.delete_custom_vao(vao);
205            device.delete_vbo(buf_position);
206            device.delete_vbo(buf_value);
207        }
208    }
209
210    pub fn get_height(&self) -> i32 {
211        self.texture.as_ref().map_or(0, |t| t.get_dimensions().height)
212    }
213
214    #[cfg(feature = "capture")]
215    pub fn get_texture(&self) -> &Texture {
216        self.texture.as_ref().unwrap()
217    }
218
219    fn prepare_for_updates(
220        &mut self,
221        device: &mut Device,
222        total_block_count: usize,
223        max_height: i32,
224    ) {
225        self.ensure_texture(device, max_height);
226        match self.bus {
227            GpuCacheBus::PixelBuffer { .. } => {},
228            GpuCacheBus::Scatter {
229                ref mut buf_position,
230                ref mut buf_value,
231                ref mut count,
232                ..
233            } => {
234                *count = 0;
235                if total_block_count > buf_value.allocated_count() {
236                    device.allocate_vbo(buf_position, total_block_count, super::ONE_TIME_USAGE_HINT);
237                    device.allocate_vbo(buf_value,    total_block_count, super::ONE_TIME_USAGE_HINT);
238                }
239            }
240        }
241    }
242
243    pub fn invalidate(&mut self) {
244        match self.bus {
245            GpuCacheBus::PixelBuffer { ref mut rows, .. } => {
246                info!("Invalidating GPU caches");
247                for row in rows {
248                    row.add_dirty(0, super::MAX_VERTEX_TEXTURE_WIDTH);
249                }
250            }
251            GpuCacheBus::Scatter { .. } => {
252                warn!("Unable to invalidate scattered GPU cache");
253            }
254        }
255    }
256
257    fn update(&mut self, device: &mut Device, updates: &GpuCacheUpdateList) {
258        match self.bus {
259            GpuCacheBus::PixelBuffer { ref mut rows, .. } => {
260                for update in &updates.updates {
261                    match *update {
262                        GpuCacheUpdate::Copy {
263                            block_index,
264                            block_count,
265                            address,
266                        } => {
267                            let row = address.v as usize;
268
269                            // Ensure that the CPU-side shadow copy of the GPU cache data has enough
270                            // rows to apply this patch.
271                            while rows.len() <= row {
272                                // Add a new row.
273                                rows.push(CacheRow::new());
274                            }
275
276                            // Copy the blocks from the patch array in the shadow CPU copy.
277                            let block_offset = address.u as usize;
278                            let data = &mut rows[row].cpu_blocks;
279                            for i in 0 .. block_count {
280                                data[block_offset + i] = updates.blocks[block_index + i];
281                            }
282
283                            // This row is dirty (needs to be updated in GPU texture).
284                            rows[row].add_dirty(block_offset, block_count);
285                        }
286                    }
287                }
288            }
289            GpuCacheBus::Scatter {
290                ref buf_position,
291                ref buf_value,
292                ref mut count,
293                ..
294            } => {
295                //TODO: re-use this heap allocation
296                // Unused positions will be left as 0xFFFF, which translates to
297                // (1.0, 1.0) in the vertex output position and gets culled out
298                let mut position_data = vec![[!0u16; 2]; updates.blocks.len()];
299                let size = self.texture.as_ref().unwrap().get_dimensions().to_usize();
300
301                for update in &updates.updates {
302                    match *update {
303                        GpuCacheUpdate::Copy {
304                            block_index,
305                            block_count,
306                            address,
307                        } => {
308                            // Convert the absolute texel position into normalized
309                            let y = ((2*address.v as usize + 1) << 15) / size.height;
310                            for i in 0 .. block_count {
311                                let x = ((2*address.u as usize + 2*i + 1) << 15) / size.width;
312                                position_data[block_index + i] = [x as _, y as _];
313                            }
314                        }
315                    }
316                }
317
318                device.fill_vbo(buf_value, &updates.blocks, *count);
319                device.fill_vbo(buf_position, &position_data, *count);
320                *count += position_data.len();
321            }
322        }
323    }
324
325    fn flush(&mut self, device: &mut Device, pbo_pool: &mut UploadPBOPool) -> usize {
326        let texture = self.texture.as_ref().unwrap();
327        match self.bus {
328            GpuCacheBus::PixelBuffer { ref mut rows } => {
329                let rows_dirty = rows
330                    .iter()
331                    .filter(|row| row.is_dirty())
332                    .count();
333                if rows_dirty == 0 {
334                    return 0
335                }
336
337                let mut uploader = device.upload_texture(pbo_pool);
338
339                for (row_index, row) in rows.iter_mut().enumerate() {
340                    if !row.is_dirty() {
341                        continue;
342                    }
343
344                    let blocks = row.dirty_blocks();
345                    let rect = DeviceIntRect::from_origin_and_size(
346                        DeviceIntPoint::new(row.min_dirty as i32, row_index as i32),
347                        DeviceIntSize::new(blocks.len() as i32, 1),
348                    );
349
350                    uploader.upload(device, texture, rect, None, None, blocks.as_ptr(), blocks.len());
351
352                    row.clear_dirty();
353                }
354
355                uploader.flush(device);
356
357                rows_dirty
358            }
359            GpuCacheBus::Scatter { ref program, ref vao, count, .. } => {
360                device.disable_depth();
361                device.set_blend(false);
362                device.bind_program(program);
363                device.bind_custom_vao(vao);
364                device.bind_draw_target(
365                    DrawTarget::from_texture(
366                        texture,
367                        false,
368                    ),
369                );
370                device.draw_nonindexed_points(0, count as _);
371                0
372            }
373        }
374    }
375
376    #[cfg(feature = "replay")]
377    pub fn remove_texture(&mut self, device: &mut Device) {
378        if let Some(t) = self.texture.take() {
379            device.delete_texture(t);
380        }
381    }
382
383    #[cfg(feature = "replay")]
384    pub fn load_from_data(&mut self, texture: Texture, data: Vec<u8>) {
385        assert!(self.texture.is_none());
386        match self.bus {
387            GpuCacheBus::PixelBuffer { ref mut rows, .. } => {
388                let dim = texture.get_dimensions();
389                let blocks = unsafe {
390                    std::slice::from_raw_parts(
391                        data.as_ptr() as *const GpuBlockData,
392                        data.len() / mem::size_of::<GpuBlockData>(),
393                    )
394                };
395                // fill up the CPU cache from the contents we just loaded
396                rows.clear();
397                rows.extend((0 .. dim.height).map(|_| CacheRow::new()));
398                let chunks = blocks.chunks(super::MAX_VERTEX_TEXTURE_WIDTH);
399                debug_assert_eq!(chunks.len(), rows.len());
400                for (row, chunk) in rows.iter_mut().zip(chunks) {
401                    row.cpu_blocks.copy_from_slice(chunk);
402                }
403            }
404            GpuCacheBus::Scatter { .. } => {}
405        }
406        self.texture = Some(texture);
407    }
408
409    pub fn report_memory_to(&self, report: &mut MemoryReport, size_op_funs: &MallocSizeOfOps) {
410        if let GpuCacheBus::PixelBuffer{ref rows, ..} = self.bus {
411            for row in rows.iter() {
412                report.gpu_cache_cpu_mirror += unsafe { (size_op_funs.size_of_op)(row.cpu_blocks.as_ptr() as *const _) };
413            }
414        }
415
416        // GPU cache GPU memory.
417        report.gpu_cache_textures +=
418            self.texture.as_ref().map_or(0, |t| t.size_in_bytes());
419    }
420}
421
422impl super::Renderer {
423    pub fn update_gpu_cache(&mut self) {
424        let _gm = self.gpu_profiler.start_marker("gpu cache update");
425
426        // For an artificial stress test of GPU cache resizing,
427        // always pass an extra update list with at least one block in it.
428        let gpu_cache_height = self.gpu_cache_texture.get_height();
429        if gpu_cache_height != 0 && GPU_CACHE_RESIZE_TEST {
430            self.pending_gpu_cache_updates.push(GpuCacheUpdateList {
431                frame_id: FrameId::INVALID,
432                clear: false,
433                height: gpu_cache_height,
434                blocks: vec![[1f32; 4].into()],
435                updates: Vec::new(),
436                debug_commands: Vec::new(),
437            });
438        }
439
440        let (updated_blocks, max_requested_height) = self
441            .pending_gpu_cache_updates
442            .iter()
443            .fold((0, gpu_cache_height), |(count, height), list| {
444                (count + list.blocks.len(), cmp::max(height, list.height))
445            });
446
447        if max_requested_height > self.get_max_texture_size() && !self.gpu_cache_overflow {
448            self.gpu_cache_overflow = true;
449            self.renderer_errors.push(super::RendererError::MaxTextureSize);
450        }
451
452        // Note: if we decide to switch to scatter-style GPU cache update
453        // permanently, we can have this code nicer with `BufferUploader` kind
454        // of helper, similarly to how `TextureUploader` API is used.
455        self.gpu_cache_texture.prepare_for_updates(
456            &mut self.device,
457            updated_blocks,
458            max_requested_height,
459        );
460
461        for update_list in self.pending_gpu_cache_updates.drain(..) {
462            assert!(update_list.height <= max_requested_height);
463            if update_list.frame_id > self.gpu_cache_frame_id {
464                self.gpu_cache_frame_id = update_list.frame_id
465            }
466            self.gpu_cache_texture
467                .update(&mut self.device, &update_list);
468        }
469
470        self.profile.start_time(profiler::GPU_CACHE_UPLOAD_TIME);
471        let updated_rows = self.gpu_cache_texture.flush(
472            &mut self.device,
473            &mut self.texture_upload_pbo_pool
474        );
475        self.gpu_cache_upload_time += self.profile.end_time(profiler::GPU_CACHE_UPLOAD_TIME);
476
477        self.profile.set(profiler::GPU_CACHE_ROWS_UPDATED, updated_rows);
478        self.profile.set(profiler::GPU_CACHE_BLOCKS_UPDATED, updated_blocks);
479    }
480
481    pub fn prepare_gpu_cache(
482        &mut self,
483        deferred_resolves: &[DeferredResolve],
484    ) -> Result<(), super::RendererError> {
485        if self.pending_gpu_cache_clear {
486            let use_scatter =
487                matches!(self.gpu_cache_texture.bus, GpuCacheBus::Scatter { .. });
488            let new_cache = GpuCacheTexture::new(&mut self.device, use_scatter)?;
489            let old_cache = mem::replace(&mut self.gpu_cache_texture, new_cache);
490            old_cache.deinit(&mut self.device);
491            self.pending_gpu_cache_clear = false;
492        }
493
494        let deferred_update_list = self.update_deferred_resolves(deferred_resolves);
495        self.pending_gpu_cache_updates.extend(deferred_update_list);
496
497        self.update_gpu_cache();
498
499        // Note: the texture might have changed during the `update`,
500        // so we need to bind it here.
501        self.device.bind_texture(
502            super::TextureSampler::GpuCache,
503            self.gpu_cache_texture.texture.as_ref().unwrap(),
504            Swizzle::default(),
505        );
506
507        Ok(())
508    }
509
510    pub fn read_gpu_cache(&mut self) -> (DeviceIntSize, Vec<u8>) {
511        let texture = self.gpu_cache_texture.texture.as_ref().unwrap();
512        let size = device_size_as_framebuffer_size(texture.get_dimensions());
513        let mut texels = vec![0; (size.width * size.height * 16) as usize];
514        self.device.begin_frame();
515        self.device.bind_read_target(ReadTarget::from_texture(texture));
516        self.device.read_pixels_into(
517            size.into(),
518            api::ImageFormat::RGBAF32,
519            &mut texels,
520        );
521        self.device.reset_read_target();
522        self.device.end_frame();
523        (texture.get_dimensions(), texels)
524    }
525}