1#![allow(clippy::cast_ptr_alignment)]
2
3use crate::video::decode::Decoder;
9use crate::video::pixel::{CastFromPrimitive, Pixel};
10use crate::video::VideoMetric;
11use crate::MetricsError;
12use std::f64;
13use std::mem::size_of;
14
15mod rgbtolab;
16use rgbtolab::*;
17
18mod delta_e;
19use delta_e::*;
20
21#[inline]
29pub fn calculate_video_ciede<D: Decoder, F: Fn(usize) + Send>(
30 decoder1: &mut D,
31 decoder2: &mut D,
32 frame_limit: Option<usize>,
33 progress_callback: F,
34) -> Result<f64, Box<dyn Error>> {
35 Ciede2000::default().process_video(decoder1, decoder2, frame_limit, progress_callback)
36}
37
38#[inline]
43#[doc(hidden)]
44pub fn calculate_video_ciede_nosimd<D: Decoder, F: Fn(usize) + Send>(
45 decoder1: &mut D,
46 decoder2: &mut D,
47 frame_limit: Option<usize>,
48 progress_callback: F,
49) -> Result<f64, Box<dyn Error>> {
50 (Ciede2000 { use_simd: false }).process_video(
51 decoder1,
52 decoder2,
53 frame_limit,
54 progress_callback,
55 )
56}
57
58#[inline]
60pub fn calculate_frame_ciede<T: Pixel>(
61 frame1: &Frame<T>,
62 frame2: &Frame<T>,
63 bit_depth: usize,
64 chroma_sampling: ChromaSampling,
65) -> Result<f64, Box<dyn Error>> {
66 Ciede2000::default().process_frame(frame1, frame2, bit_depth, chroma_sampling)
67}
68
69#[inline]
74#[doc(hidden)]
75pub fn calculate_frame_ciede_nosimd<T: Pixel>(
76 frame1: &Frame<T>,
77 frame2: &Frame<T>,
78 bit_depth: usize,
79 chroma_sampling: ChromaSampling,
80) -> Result<f64, Box<dyn Error>> {
81 (Ciede2000 { use_simd: false }).process_frame(frame1, frame2, bit_depth, chroma_sampling)
82}
83
84struct Ciede2000 {
85 use_simd: bool,
86}
87
88impl Default for Ciede2000 {
89 fn default() -> Self {
90 Ciede2000 { use_simd: true }
91 }
92}
93
94use rayon::prelude::*;
95use v_frame::frame::Frame;
96use v_frame::prelude::ChromaSampling;
97
98impl VideoMetric for Ciede2000 {
99 type FrameResult = f64;
100 type VideoResult = f64;
101
102 fn process_frame<T: Pixel>(
103 &self,
104 frame1: &Frame<T>,
105 frame2: &Frame<T>,
106 bit_depth: usize,
107 chroma_sampling: ChromaSampling,
108 ) -> Result<Self::FrameResult, Box<dyn Error>> {
109 if (size_of::<T>() == 1 && bit_depth > 8) || (size_of::<T>() == 2 && bit_depth <= 8) {
110 return Err(Box::new(MetricsError::InputMismatch {
111 reason: "Bit depths does not match pixel width",
112 }));
113 }
114
115 frame1.can_compare(frame2)?;
116
117 let dec = chroma_sampling.get_decimation().unwrap_or((1, 1));
118 let y_width = frame1.planes[0].cfg.width;
119 let y_height = frame1.planes[0].cfg.height;
120 let c_width = frame1.planes[1].cfg.width;
121 let delta_e_row_fn = get_delta_e_row_fn(bit_depth, dec.0, self.use_simd);
122 let delta_e_per_line = (0..y_height).into_par_iter().map(|i| {
125 let y_start = i * y_width;
126 let y_end = y_start + y_width;
127 let c_start = (i >> dec.1) * c_width;
128 let c_end = c_start + c_width;
129
130 let y_range = y_start..y_end;
131 let c_range = c_start..c_end;
132
133 let mut delta_e_vec = vec![0.0; y_end - y_start];
134
135 unsafe {
136 delta_e_row_fn(
137 FrameRow {
138 y: &frame1.planes[0].data[y_range.clone()],
139 u: &frame1.planes[1].data[c_range.clone()],
140 v: &frame1.planes[2].data[c_range.clone()],
141 },
142 FrameRow {
143 y: &frame2.planes[0].data[y_range],
144 u: &frame2.planes[1].data[c_range.clone()],
145 v: &frame2.planes[2].data[c_range],
146 },
147 &mut delta_e_vec[..],
148 );
149 }
150
151 delta_e_vec.iter().map(|x| *x as f64).sum::<f64>()
152 });
153
154 let score =
155 45. - 20. * (delta_e_per_line.sum::<f64>() / ((y_width * y_height) as f64)).log10();
156 Ok(score.min(100.))
157 }
158
159 fn aggregate_frame_results(
160 &self,
161 metrics: &[Self::FrameResult],
162 ) -> Result<Self::VideoResult, Box<dyn Error>> {
163 Ok(metrics.iter().copied().sum::<f64>() / metrics.len() as f64)
164 }
165}
166
167const K_SUB: KSubArgs = KSubArgs {
172 l: 0.65,
173 c: 1.0,
174 h: 4.0,
175};
176
177pub(crate) struct FrameRow<'a, T: Pixel> {
178 y: &'a [T],
179 u: &'a [T],
180 v: &'a [T],
181}
182
183type DeltaERowFn<T> = unsafe fn(FrameRow<T>, FrameRow<T>, &mut [f32]);
184
185fn get_delta_e_row_fn<T: Pixel>(bit_depth: usize, xdec: usize, simd: bool) -> DeltaERowFn<T> {
186 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
187 {
188 if is_x86_feature_detected!("avx2") && xdec == 1 && simd {
189 return match bit_depth {
190 8 => BD8::delta_e_row_avx2,
191 10 => BD10::delta_e_row_avx2,
192 12 => BD12::delta_e_row_avx2,
193 _ => unreachable!(),
194 };
195 }
196 }
197 match (bit_depth, xdec) {
198 (8, 1) => BD8::delta_e_row_scalar,
199 (10, 1) => BD10::delta_e_row_scalar,
200 (12, 1) => BD12::delta_e_row_scalar,
201 (8, 0) => BD8_444::delta_e_row_scalar,
202 (10, 0) => BD10_444::delta_e_row_scalar,
203 (12, 0) => BD12_444::delta_e_row_scalar,
204 _ => unreachable!(),
205 }
206}
207
208pub(crate) trait Colorspace {
209 const BIT_DEPTH: u32;
210 const X_DECIMATION: u32;
211}
212
213struct BD8;
214struct BD10;
215struct BD12;
216
217struct BD8_444;
218struct BD10_444;
219struct BD12_444;
220
221impl Colorspace for BD8 {
222 const BIT_DEPTH: u32 = 8;
223 const X_DECIMATION: u32 = 1;
224}
225impl Colorspace for BD10 {
226 const BIT_DEPTH: u32 = 10;
227 const X_DECIMATION: u32 = 1;
228}
229impl Colorspace for BD12 {
230 const BIT_DEPTH: u32 = 12;
231 const X_DECIMATION: u32 = 1;
232}
233impl Colorspace for BD8_444 {
234 const BIT_DEPTH: u32 = 8;
235 const X_DECIMATION: u32 = 0;
236}
237impl Colorspace for BD10_444 {
238 const BIT_DEPTH: u32 = 10;
239 const X_DECIMATION: u32 = 0;
240}
241impl Colorspace for BD12_444 {
242 const BIT_DEPTH: u32 = 12;
243 const X_DECIMATION: u32 = 0;
244}
245
246fn twice<T>(
247 i: T,
248) -> itertools::Interleave<<T as IntoIterator>::IntoIter, <T as IntoIterator>::IntoIter>
249where
250 T: IntoIterator + Clone,
251{
252 itertools::interleave(i.clone(), i)
253}
254
255pub(crate) trait DeltaEScalar: Colorspace {
256 fn delta_e_scalar(yuv1: (u16, u16, u16), yuv2: (u16, u16, u16)) -> f32 {
257 let scale = (1 << (Self::BIT_DEPTH - 8)) as f32;
258 let yuv_to_rgb = |yuv: (u16, u16, u16)| {
259 let y = (yuv.0 as f32 - 16. * scale) * (1. / (219. * scale));
261 let u = (yuv.1 as f32 - 128. * scale) * (1. / (224. * scale));
262 let v = (yuv.2 as f32 - 128. * scale) * (1. / (224. * scale));
263
264 let r = y + 1.28033 * v;
266 let g = y - 0.21482 * u - 0.38059 * v;
268 let b = y + 2.12798 * u;
270
271 (r, g, b)
272 };
273
274 let (r1, g1, b1) = yuv_to_rgb(yuv1);
275 let (r2, g2, b2) = yuv_to_rgb(yuv2);
276 DE2000::new(rgb_to_lab(&[r1, g1, b1]), rgb_to_lab(&[r2, g2, b2]), K_SUB)
277 }
278
279 unsafe fn delta_e_row_scalar<T: Pixel>(
280 row1: FrameRow<T>,
281 row2: FrameRow<T>,
282 res_row: &mut [f32],
283 ) {
284 if Self::X_DECIMATION == 1 {
285 for (y1, u1, v1, y2, u2, v2, res) in izip!(
286 row1.y,
287 twice(row1.u),
288 twice(row1.v),
289 row2.y,
290 twice(row2.u),
291 twice(row2.v),
292 res_row
293 ) {
294 *res = Self::delta_e_scalar(
295 (
296 u16::cast_from(*y1),
297 u16::cast_from(*u1),
298 u16::cast_from(*v1),
299 ),
300 (
301 u16::cast_from(*y2),
302 u16::cast_from(*u2),
303 u16::cast_from(*v2),
304 ),
305 );
306 }
307 } else {
308 for (y1, u1, v1, y2, u2, v2, res) in
309 izip!(row1.y, row1.u, row1.v, row2.y, row2.u, row2.v, res_row)
310 {
311 *res = Self::delta_e_scalar(
312 (
313 u16::cast_from(*y1),
314 u16::cast_from(*u1),
315 u16::cast_from(*v1),
316 ),
317 (
318 u16::cast_from(*y2),
319 u16::cast_from(*u2),
320 u16::cast_from(*v2),
321 ),
322 );
323 }
324 }
325 }
326}
327
328impl DeltaEScalar for BD8 {}
329impl DeltaEScalar for BD10 {}
330impl DeltaEScalar for BD12 {}
331impl DeltaEScalar for BD8_444 {}
332impl DeltaEScalar for BD10_444 {}
333impl DeltaEScalar for BD12_444 {}
334
335#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
336use self::avx2::*;
337use std::error::Error;
338
339use super::FrameCompare;
340
341#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
342mod avx2 {
343 use super::*;
344
345 #[cfg(target_arch = "x86")]
346 use std::arch::x86::*;
347 #[cfg(target_arch = "x86_64")]
348 use std::arch::x86_64::*;
349
350 pub(crate) trait DeltaEAVX2: Colorspace + DeltaEScalar {
351 #[target_feature(enable = "avx2")]
352 unsafe fn yuv_to_rgb(yuv: (__m256, __m256, __m256)) -> (__m256, __m256, __m256) {
353 let scale: f32 = (1 << (Self::BIT_DEPTH - 8)) as f32;
354 #[target_feature(enable = "avx2")]
355 unsafe fn set1(val: f32) -> __m256 {
356 _mm256_set1_ps(val)
357 }
358 let y = _mm256_mul_ps(
359 _mm256_sub_ps(yuv.0, set1(16. * scale)),
360 set1(1. / (219. * scale)),
361 );
362 let u = _mm256_mul_ps(
363 _mm256_sub_ps(yuv.1, set1(128. * scale)),
364 set1(1. / (224. * scale)),
365 );
366 let v = _mm256_mul_ps(
367 _mm256_sub_ps(yuv.2, set1(128. * scale)),
368 set1(1. / (224. * scale)),
369 );
370
371 let r = _mm256_add_ps(y, _mm256_mul_ps(v, set1(1.28033)));
372 let g = _mm256_add_ps(
373 _mm256_add_ps(y, _mm256_mul_ps(u, set1(-0.21482))),
374 _mm256_mul_ps(v, set1(-0.38059)),
375 );
376 let b = _mm256_add_ps(y, _mm256_mul_ps(u, set1(2.12798)));
377
378 (r, g, b)
379 }
380
381 #[target_feature(enable = "avx2")]
382 unsafe fn delta_e_avx2(
383 yuv1: (__m256, __m256, __m256),
384 yuv2: (__m256, __m256, __m256),
385 res_chunk: &mut [f32],
386 ) {
387 let (r1, g1, b1) = Self::yuv_to_rgb(yuv1);
388 let (r2, g2, b2) = Self::yuv_to_rgb(yuv2);
389
390 let lab1 = rgb_to_lab_avx2(&[r1, g1, b1]);
391 let lab2 = rgb_to_lab_avx2(&[r2, g2, b2]);
392 for i in 0..8 {
393 res_chunk[i] = DE2000::new(lab1[i], lab2[i], K_SUB);
394 }
395 }
396
397 #[target_feature(enable = "avx2")]
398 unsafe fn delta_e_row_avx2<T: Pixel>(
399 row1: FrameRow<T>,
400 row2: FrameRow<T>,
401 res_row: &mut [f32],
402 ) {
403 if Self::BIT_DEPTH == 8 {
405 for (chunk1_y, chunk1_u, chunk1_v, chunk2_y, chunk2_u, chunk2_v, res_chunk) in izip!(
406 row1.y.chunks(8),
407 row1.u.chunks(4),
408 row1.v.chunks(4),
409 row2.y.chunks(8),
410 row2.u.chunks(4),
411 row2.v.chunks(4),
412 res_row.chunks_mut(8)
413 ) {
414 if chunk1_y.len() == 8 {
415 #[inline(always)]
416 unsafe fn load_luma(chunk: &[u8]) -> __m256 {
417 let tmp = _mm_loadl_epi64(chunk.as_ptr() as *const _);
418 _mm256_cvtepi32_ps(_mm256_cvtepu8_epi32(tmp))
419 }
420
421 #[inline(always)]
422 unsafe fn load_chroma(chunk: &[u8]) -> __m256 {
423 let tmp = _mm_cvtsi32_si128(*(chunk.as_ptr() as *const i32));
424 _mm256_cvtepi32_ps(_mm256_cvtepu8_epi32(_mm_unpacklo_epi8(tmp, tmp)))
425 }
426
427 Self::delta_e_avx2(
428 (
429 load_luma(
430 &chunk1_y
431 .iter()
432 .map(|p| u8::cast_from(*p))
433 .collect::<Vec<_>>(),
434 ),
435 load_chroma(
436 &chunk1_u
437 .iter()
438 .map(|p| u8::cast_from(*p))
439 .collect::<Vec<_>>(),
440 ),
441 load_chroma(
442 &chunk1_v
443 .iter()
444 .map(|p| u8::cast_from(*p))
445 .collect::<Vec<_>>(),
446 ),
447 ),
448 (
449 load_luma(
450 &chunk2_y
451 .iter()
452 .map(|p| u8::cast_from(*p))
453 .collect::<Vec<_>>(),
454 ),
455 load_chroma(
456 &chunk2_u
457 .iter()
458 .map(|p| u8::cast_from(*p))
459 .collect::<Vec<_>>(),
460 ),
461 load_chroma(
462 &chunk2_v
463 .iter()
464 .map(|p| u8::cast_from(*p))
465 .collect::<Vec<_>>(),
466 ),
467 ),
468 res_chunk,
469 );
470 } else {
471 Self::delta_e_row_scalar(
472 FrameRow {
473 y: chunk1_y,
474 u: chunk1_u,
475 v: chunk1_v,
476 },
477 FrameRow {
478 y: chunk2_y,
479 u: chunk2_u,
480 v: chunk2_v,
481 },
482 res_chunk,
483 );
484 }
485 }
486 } else {
487 for (chunk1_y, chunk1_u, chunk1_v, chunk2_y, chunk2_u, chunk2_v, res_chunk) in izip!(
488 row1.y.chunks(8),
489 row1.u.chunks(4),
490 row1.v.chunks(4),
491 row2.y.chunks(8),
492 row2.u.chunks(4),
493 row2.v.chunks(4),
494 res_row.chunks_mut(8)
495 ) {
496 if chunk1_y.len() == 8 {
497 #[inline(always)]
498 unsafe fn load_luma(chunk: &[u16]) -> __m256 {
499 let tmp = _mm_loadu_si128(chunk.as_ptr() as *const _);
500 _mm256_cvtepi32_ps(_mm256_cvtepu16_epi32(tmp))
501 }
502
503 #[inline(always)]
504 unsafe fn load_chroma(chunk: &[u16]) -> __m256 {
505 let tmp = _mm_loadl_epi64(chunk.as_ptr() as *const _);
506 _mm256_cvtepi32_ps(_mm256_cvtepu16_epi32(_mm_unpacklo_epi16(tmp, tmp)))
507 }
508
509 Self::delta_e_avx2(
510 (
511 load_luma(
512 &chunk1_y
513 .iter()
514 .map(|p| u16::cast_from(*p))
515 .collect::<Vec<_>>(),
516 ),
517 load_chroma(
518 &chunk1_u
519 .iter()
520 .map(|p| u16::cast_from(*p))
521 .collect::<Vec<_>>(),
522 ),
523 load_chroma(
524 &chunk1_v
525 .iter()
526 .map(|p| u16::cast_from(*p))
527 .collect::<Vec<_>>(),
528 ),
529 ),
530 (
531 load_luma(
532 &chunk2_y
533 .iter()
534 .map(|p| u16::cast_from(*p))
535 .collect::<Vec<_>>(),
536 ),
537 load_chroma(
538 &chunk2_u
539 .iter()
540 .map(|p| u16::cast_from(*p))
541 .collect::<Vec<_>>(),
542 ),
543 load_chroma(
544 &chunk2_v
545 .iter()
546 .map(|p| u16::cast_from(*p))
547 .collect::<Vec<_>>(),
548 ),
549 ),
550 res_chunk,
551 );
552 } else {
553 Self::delta_e_row_scalar(
554 FrameRow {
555 y: chunk1_y,
556 u: chunk1_u,
557 v: chunk1_v,
558 },
559 FrameRow {
560 y: chunk2_y,
561 u: chunk2_u,
562 v: chunk2_v,
563 },
564 res_chunk,
565 );
566 }
567 }
568 }
569 }
570 }
571
572 impl DeltaEAVX2 for BD8 {}
573 impl DeltaEAVX2 for BD10 {}
574 impl DeltaEAVX2 for BD12 {}
575}