1use std::{cmp, mem};
6use api::units::*;
7use malloc_size_of::MallocSizeOfOps;
8use crate::{
9 device::{CustomVAO, Device, DrawTarget, Program, ReadTarget, Texture, TextureFilter, UploadPBOPool, VBO},
10 gpu_cache::{GpuBlockData, GpuCacheUpdate, GpuCacheUpdateList},
11 internal_types::{RenderTargetInfo, Swizzle},
12 prim_store::DeferredResolve,
13 profiler,
14 render_api::MemoryReport,
15 render_backend::FrameId,
16};
17
18const GPU_CACHE_RESIZE_TEST: bool = false;
22
23struct CacheRow {
25 cpu_blocks: Box<[GpuBlockData; super::MAX_VERTEX_TEXTURE_WIDTH]>,
28 min_dirty: u16,
30 max_dirty: u16,
32}
33
34impl CacheRow {
35 fn new() -> Self {
36 CacheRow {
37 cpu_blocks: Box::new([GpuBlockData::EMPTY; super::MAX_VERTEX_TEXTURE_WIDTH]),
38 min_dirty: super::MAX_VERTEX_TEXTURE_WIDTH as _,
39 max_dirty: 0,
40 }
41 }
42
43 fn is_dirty(&self) -> bool {
44 return self.min_dirty < self.max_dirty;
45 }
46
47 fn clear_dirty(&mut self) {
48 self.min_dirty = super::MAX_VERTEX_TEXTURE_WIDTH as _;
49 self.max_dirty = 0;
50 }
51
52 fn add_dirty(&mut self, block_offset: usize, block_count: usize) {
53 self.min_dirty = self.min_dirty.min(block_offset as _);
54 self.max_dirty = self.max_dirty.max((block_offset + block_count) as _);
55 }
56
57 fn dirty_blocks(&self) -> &[GpuBlockData] {
58 return &self.cpu_blocks[self.min_dirty as usize .. self.max_dirty as usize];
59 }
60}
61
62enum GpuCacheBus {
65 PixelBuffer {
68 rows: Vec<CacheRow>,
70 },
71 Scatter {
74 program: Program,
76 vao: CustomVAO,
78 buf_position: VBO<[u16; 2]>,
80 buf_value: VBO<GpuBlockData>,
82 count: usize,
84 },
85}
86
87pub struct GpuCacheTexture {
89 texture: Option<Texture>,
90 bus: GpuCacheBus,
91}
92
93impl GpuCacheTexture {
94 fn ensure_texture(&mut self, device: &mut Device, height: i32) {
96 if self.texture.as_ref().map_or(false, |t| t.get_dimensions().height >= height) {
98 if GPU_CACHE_RESIZE_TEST {
99 } else {
101 return;
102 }
103 }
104
105 let blit_source = self.texture.take();
107
108 assert!(height >= 2, "Height is too small for ANGLE");
110 let new_size = DeviceIntSize::new(super::MAX_VERTEX_TEXTURE_WIDTH as _, height);
111 let supports_copy_image_sub_data = device.get_capabilities().supports_copy_image_sub_data;
118 let supports_color_buffer_float = device.get_capabilities().supports_color_buffer_float;
119 let rt_info = if matches!(self.bus, GpuCacheBus::PixelBuffer { .. })
120 && (supports_copy_image_sub_data || !supports_color_buffer_float)
121 {
122 None
123 } else {
124 Some(RenderTargetInfo { has_depth: false })
125 };
126 let mut texture = device.create_texture(
127 api::ImageBufferKind::Texture2D,
128 api::ImageFormat::RGBAF32,
129 new_size.width,
130 new_size.height,
131 TextureFilter::Nearest,
132 rt_info,
133 );
134
135 if let Some(blit_source) = blit_source {
137 if !supports_copy_image_sub_data && !supports_color_buffer_float {
138 match self.bus {
140 GpuCacheBus::PixelBuffer { ref mut rows } => {
141 for row in rows {
142 row.add_dirty(0, super::MAX_VERTEX_TEXTURE_WIDTH);
143 }
144 }
145 GpuCacheBus::Scatter { .. } => {
146 panic!("Texture must be copyable to use scatter GPU cache bus method");
147 }
148 }
149 } else {
150 device.copy_entire_texture(&mut texture, &blit_source);
151 }
152 device.delete_texture(blit_source);
153 }
154
155 self.texture = Some(texture);
156 }
157
158 pub fn new(device: &mut Device, use_scatter: bool) -> Result<Self, super::RendererError> {
159 use super::desc::GPU_CACHE_UPDATE;
160
161 let bus = if use_scatter {
162 assert!(
163 device.get_capabilities().supports_color_buffer_float,
164 "GpuCache scatter method requires EXT_color_buffer_float",
165 );
166 let program = device.create_program_linked(
167 "gpu_cache_update",
168 &[],
169 &GPU_CACHE_UPDATE,
170 )?;
171 let buf_position = device.create_vbo();
172 let buf_value = device.create_vbo();
173 let vao = device.create_custom_vao(&[
176 buf_position.stream_with(&GPU_CACHE_UPDATE.vertex_attributes[0..1]),
177 buf_value .stream_with(&GPU_CACHE_UPDATE.vertex_attributes[1..2]),
178 ]);
179 GpuCacheBus::Scatter {
180 program,
181 vao,
182 buf_position,
183 buf_value,
184 count: 0,
185 }
186 } else {
187 GpuCacheBus::PixelBuffer {
188 rows: Vec::new(),
189 }
190 };
191
192 Ok(GpuCacheTexture {
193 texture: None,
194 bus,
195 })
196 }
197
198 pub fn deinit(mut self, device: &mut Device) {
199 if let Some(t) = self.texture.take() {
200 device.delete_texture(t);
201 }
202 if let GpuCacheBus::Scatter { program, vao, buf_position, buf_value, .. } = self.bus {
203 device.delete_program(program);
204 device.delete_custom_vao(vao);
205 device.delete_vbo(buf_position);
206 device.delete_vbo(buf_value);
207 }
208 }
209
210 pub fn get_height(&self) -> i32 {
211 self.texture.as_ref().map_or(0, |t| t.get_dimensions().height)
212 }
213
214 #[cfg(feature = "capture")]
215 pub fn get_texture(&self) -> &Texture {
216 self.texture.as_ref().unwrap()
217 }
218
219 fn prepare_for_updates(
220 &mut self,
221 device: &mut Device,
222 total_block_count: usize,
223 max_height: i32,
224 ) {
225 self.ensure_texture(device, max_height);
226 match self.bus {
227 GpuCacheBus::PixelBuffer { .. } => {},
228 GpuCacheBus::Scatter {
229 ref mut buf_position,
230 ref mut buf_value,
231 ref mut count,
232 ..
233 } => {
234 *count = 0;
235 if total_block_count > buf_value.allocated_count() {
236 device.allocate_vbo(buf_position, total_block_count, super::ONE_TIME_USAGE_HINT);
237 device.allocate_vbo(buf_value, total_block_count, super::ONE_TIME_USAGE_HINT);
238 }
239 }
240 }
241 }
242
243 pub fn invalidate(&mut self) {
244 match self.bus {
245 GpuCacheBus::PixelBuffer { ref mut rows, .. } => {
246 info!("Invalidating GPU caches");
247 for row in rows {
248 row.add_dirty(0, super::MAX_VERTEX_TEXTURE_WIDTH);
249 }
250 }
251 GpuCacheBus::Scatter { .. } => {
252 warn!("Unable to invalidate scattered GPU cache");
253 }
254 }
255 }
256
257 fn update(&mut self, device: &mut Device, updates: &GpuCacheUpdateList) {
258 match self.bus {
259 GpuCacheBus::PixelBuffer { ref mut rows, .. } => {
260 for update in &updates.updates {
261 match *update {
262 GpuCacheUpdate::Copy {
263 block_index,
264 block_count,
265 address,
266 } => {
267 let row = address.v as usize;
268
269 while rows.len() <= row {
272 rows.push(CacheRow::new());
274 }
275
276 let block_offset = address.u as usize;
278 let data = &mut rows[row].cpu_blocks;
279 for i in 0 .. block_count {
280 data[block_offset + i] = updates.blocks[block_index + i];
281 }
282
283 rows[row].add_dirty(block_offset, block_count);
285 }
286 }
287 }
288 }
289 GpuCacheBus::Scatter {
290 ref buf_position,
291 ref buf_value,
292 ref mut count,
293 ..
294 } => {
295 let mut position_data = vec![[!0u16; 2]; updates.blocks.len()];
299 let size = self.texture.as_ref().unwrap().get_dimensions().to_usize();
300
301 for update in &updates.updates {
302 match *update {
303 GpuCacheUpdate::Copy {
304 block_index,
305 block_count,
306 address,
307 } => {
308 let y = ((2*address.v as usize + 1) << 15) / size.height;
310 for i in 0 .. block_count {
311 let x = ((2*address.u as usize + 2*i + 1) << 15) / size.width;
312 position_data[block_index + i] = [x as _, y as _];
313 }
314 }
315 }
316 }
317
318 device.fill_vbo(buf_value, &updates.blocks, *count);
319 device.fill_vbo(buf_position, &position_data, *count);
320 *count += position_data.len();
321 }
322 }
323 }
324
325 fn flush(&mut self, device: &mut Device, pbo_pool: &mut UploadPBOPool) -> usize {
326 let texture = self.texture.as_ref().unwrap();
327 match self.bus {
328 GpuCacheBus::PixelBuffer { ref mut rows } => {
329 let rows_dirty = rows
330 .iter()
331 .filter(|row| row.is_dirty())
332 .count();
333 if rows_dirty == 0 {
334 return 0
335 }
336
337 let mut uploader = device.upload_texture(pbo_pool);
338
339 for (row_index, row) in rows.iter_mut().enumerate() {
340 if !row.is_dirty() {
341 continue;
342 }
343
344 let blocks = row.dirty_blocks();
345 let rect = DeviceIntRect::from_origin_and_size(
346 DeviceIntPoint::new(row.min_dirty as i32, row_index as i32),
347 DeviceIntSize::new(blocks.len() as i32, 1),
348 );
349
350 uploader.upload(device, texture, rect, None, None, blocks.as_ptr(), blocks.len());
351
352 row.clear_dirty();
353 }
354
355 uploader.flush(device);
356
357 rows_dirty
358 }
359 GpuCacheBus::Scatter { ref program, ref vao, count, .. } => {
360 device.disable_depth();
361 device.set_blend(false);
362 device.bind_program(program);
363 device.bind_custom_vao(vao);
364 device.bind_draw_target(
365 DrawTarget::from_texture(
366 texture,
367 false,
368 ),
369 );
370 device.draw_nonindexed_points(0, count as _);
371 0
372 }
373 }
374 }
375
376 #[cfg(feature = "replay")]
377 pub fn remove_texture(&mut self, device: &mut Device) {
378 if let Some(t) = self.texture.take() {
379 device.delete_texture(t);
380 }
381 }
382
383 #[cfg(feature = "replay")]
384 pub fn load_from_data(&mut self, texture: Texture, data: Vec<u8>) {
385 assert!(self.texture.is_none());
386 match self.bus {
387 GpuCacheBus::PixelBuffer { ref mut rows, .. } => {
388 let dim = texture.get_dimensions();
389 let blocks = unsafe {
390 std::slice::from_raw_parts(
391 data.as_ptr() as *const GpuBlockData,
392 data.len() / mem::size_of::<GpuBlockData>(),
393 )
394 };
395 rows.clear();
397 rows.extend((0 .. dim.height).map(|_| CacheRow::new()));
398 let chunks = blocks.chunks(super::MAX_VERTEX_TEXTURE_WIDTH);
399 debug_assert_eq!(chunks.len(), rows.len());
400 for (row, chunk) in rows.iter_mut().zip(chunks) {
401 row.cpu_blocks.copy_from_slice(chunk);
402 }
403 }
404 GpuCacheBus::Scatter { .. } => {}
405 }
406 self.texture = Some(texture);
407 }
408
409 pub fn report_memory_to(&self, report: &mut MemoryReport, size_op_funs: &MallocSizeOfOps) {
410 if let GpuCacheBus::PixelBuffer{ref rows, ..} = self.bus {
411 for row in rows.iter() {
412 report.gpu_cache_cpu_mirror += unsafe { (size_op_funs.size_of_op)(row.cpu_blocks.as_ptr() as *const _) };
413 }
414 }
415
416 report.gpu_cache_textures +=
418 self.texture.as_ref().map_or(0, |t| t.size_in_bytes());
419 }
420}
421
422impl super::Renderer {
423 pub fn update_gpu_cache(&mut self) {
424 let _gm = self.gpu_profiler.start_marker("gpu cache update");
425
426 let gpu_cache_height = self.gpu_cache_texture.get_height();
429 if gpu_cache_height != 0 && GPU_CACHE_RESIZE_TEST {
430 self.pending_gpu_cache_updates.push(GpuCacheUpdateList {
431 frame_id: FrameId::INVALID,
432 clear: false,
433 height: gpu_cache_height,
434 blocks: vec![[1f32; 4].into()],
435 updates: Vec::new(),
436 debug_commands: Vec::new(),
437 });
438 }
439
440 let (updated_blocks, max_requested_height) = self
441 .pending_gpu_cache_updates
442 .iter()
443 .fold((0, gpu_cache_height), |(count, height), list| {
444 (count + list.blocks.len(), cmp::max(height, list.height))
445 });
446
447 if max_requested_height > self.get_max_texture_size() && !self.gpu_cache_overflow {
448 self.gpu_cache_overflow = true;
449 self.renderer_errors.push(super::RendererError::MaxTextureSize);
450 }
451
452 self.gpu_cache_texture.prepare_for_updates(
456 &mut self.device,
457 updated_blocks,
458 max_requested_height,
459 );
460
461 for update_list in self.pending_gpu_cache_updates.drain(..) {
462 assert!(update_list.height <= max_requested_height);
463 if update_list.frame_id > self.gpu_cache_frame_id {
464 self.gpu_cache_frame_id = update_list.frame_id
465 }
466 self.gpu_cache_texture
467 .update(&mut self.device, &update_list);
468 }
469
470 self.profile.start_time(profiler::GPU_CACHE_UPLOAD_TIME);
471 let updated_rows = self.gpu_cache_texture.flush(
472 &mut self.device,
473 &mut self.texture_upload_pbo_pool
474 );
475 self.gpu_cache_upload_time += self.profile.end_time(profiler::GPU_CACHE_UPLOAD_TIME);
476
477 self.profile.set(profiler::GPU_CACHE_ROWS_UPDATED, updated_rows);
478 self.profile.set(profiler::GPU_CACHE_BLOCKS_UPDATED, updated_blocks);
479 }
480
481 pub fn prepare_gpu_cache(
482 &mut self,
483 deferred_resolves: &[DeferredResolve],
484 ) -> Result<(), super::RendererError> {
485 if self.pending_gpu_cache_clear {
486 let use_scatter =
487 matches!(self.gpu_cache_texture.bus, GpuCacheBus::Scatter { .. });
488 let new_cache = GpuCacheTexture::new(&mut self.device, use_scatter)?;
489 let old_cache = mem::replace(&mut self.gpu_cache_texture, new_cache);
490 old_cache.deinit(&mut self.device);
491 self.pending_gpu_cache_clear = false;
492 }
493
494 let deferred_update_list = self.update_deferred_resolves(deferred_resolves);
495 self.pending_gpu_cache_updates.extend(deferred_update_list);
496
497 self.update_gpu_cache();
498
499 self.device.bind_texture(
502 super::TextureSampler::GpuCache,
503 self.gpu_cache_texture.texture.as_ref().unwrap(),
504 Swizzle::default(),
505 );
506
507 Ok(())
508 }
509
510 pub fn read_gpu_cache(&mut self) -> (DeviceIntSize, Vec<u8>) {
511 let texture = self.gpu_cache_texture.texture.as_ref().unwrap();
512 let size = device_size_as_framebuffer_size(texture.get_dimensions());
513 let mut texels = vec![0; (size.width * size.height * 16) as usize];
514 self.device.begin_frame();
515 self.device.bind_read_target(ReadTarget::from_texture(texture));
516 self.device.read_pixels_into(
517 size.into(),
518 api::ImageFormat::RGBAF32,
519 &mut texels,
520 );
521 self.device.reset_read_target();
522 self.device.end_frame();
523 (texture.get_dimensions(), texels)
524 }
525}