av_metrics/video/ciede/
mod.rs

1#![allow(clippy::cast_ptr_alignment)]
2
3//! The CIEDE2000 color difference formula.
4//!
5//! CIEDE2000 implementation adapted from
6//! [Kyle Siefring's](https://github.com/KyleSiefring/dump_ciede2000).
7
8use crate::video::decode::Decoder;
9use crate::video::pixel::{CastFromPrimitive, Pixel};
10use crate::video::VideoMetric;
11use crate::MetricsError;
12use std::f64;
13use std::mem::size_of;
14
15mod rgbtolab;
16use rgbtolab::*;
17
18mod delta_e;
19use delta_e::*;
20
21/// Calculate the CIEDE2000 metric between two video clips. Higher is better.
22///
23/// This will return at the end of the shorter of the two clips,
24/// comparing any frames up to that point.
25///
26/// Optionally, `frame_limit` can be set to only compare the first
27/// `frame_limit` frames in each video.
28#[inline]
29pub fn calculate_video_ciede<D: Decoder, F: Fn(usize) + Send>(
30    decoder1: &mut D,
31    decoder2: &mut D,
32    frame_limit: Option<usize>,
33    progress_callback: F,
34) -> Result<f64, Box<dyn Error>> {
35    Ciede2000::default().process_video(decoder1, decoder2, frame_limit, progress_callback)
36}
37
38/// Calculate the CIEDE2000 metric between two video clips. Higher is better.
39///
40/// This version disables SIMD. It is intended to only be used
41/// by tests and benchmarks.
42#[inline]
43#[doc(hidden)]
44pub fn calculate_video_ciede_nosimd<D: Decoder, F: Fn(usize) + Send>(
45    decoder1: &mut D,
46    decoder2: &mut D,
47    frame_limit: Option<usize>,
48    progress_callback: F,
49) -> Result<f64, Box<dyn Error>> {
50    (Ciede2000 { use_simd: false }).process_video(
51        decoder1,
52        decoder2,
53        frame_limit,
54        progress_callback,
55    )
56}
57
58/// Calculate the CIEDE2000 metric between two video frames. Higher is better.
59#[inline]
60pub fn calculate_frame_ciede<T: Pixel>(
61    frame1: &Frame<T>,
62    frame2: &Frame<T>,
63    bit_depth: usize,
64    chroma_sampling: ChromaSampling,
65) -> Result<f64, Box<dyn Error>> {
66    Ciede2000::default().process_frame(frame1, frame2, bit_depth, chroma_sampling)
67}
68
69/// Calculate the CIEDE2000 metric between two video frames. Higher is better.
70///
71/// This version disables SIMD. It is intended to only be used
72/// by tests and benchmarks.
73#[inline]
74#[doc(hidden)]
75pub fn calculate_frame_ciede_nosimd<T: Pixel>(
76    frame1: &Frame<T>,
77    frame2: &Frame<T>,
78    bit_depth: usize,
79    chroma_sampling: ChromaSampling,
80) -> Result<f64, Box<dyn Error>> {
81    (Ciede2000 { use_simd: false }).process_frame(frame1, frame2, bit_depth, chroma_sampling)
82}
83
84struct Ciede2000 {
85    use_simd: bool,
86}
87
88impl Default for Ciede2000 {
89    fn default() -> Self {
90        Ciede2000 { use_simd: true }
91    }
92}
93
94use rayon::prelude::*;
95use v_frame::frame::Frame;
96use v_frame::prelude::ChromaSampling;
97
98impl VideoMetric for Ciede2000 {
99    type FrameResult = f64;
100    type VideoResult = f64;
101
102    fn process_frame<T: Pixel>(
103        &self,
104        frame1: &Frame<T>,
105        frame2: &Frame<T>,
106        bit_depth: usize,
107        chroma_sampling: ChromaSampling,
108    ) -> Result<Self::FrameResult, Box<dyn Error>> {
109        if (size_of::<T>() == 1 && bit_depth > 8) || (size_of::<T>() == 2 && bit_depth <= 8) {
110            return Err(Box::new(MetricsError::InputMismatch {
111                reason: "Bit depths does not match pixel width",
112            }));
113        }
114
115        frame1.can_compare(frame2)?;
116
117        let dec = chroma_sampling.get_decimation().unwrap_or((1, 1));
118        let y_width = frame1.planes[0].cfg.width;
119        let y_height = frame1.planes[0].cfg.height;
120        let c_width = frame1.planes[1].cfg.width;
121        let delta_e_row_fn = get_delta_e_row_fn(bit_depth, dec.0, self.use_simd);
122        // let mut delta_e_vec: Vec<f32> = vec![0.0; y_width * y_height];
123
124        let delta_e_per_line = (0..y_height).into_par_iter().map(|i| {
125            let y_start = i * y_width;
126            let y_end = y_start + y_width;
127            let c_start = (i >> dec.1) * c_width;
128            let c_end = c_start + c_width;
129
130            let y_range = y_start..y_end;
131            let c_range = c_start..c_end;
132
133            let mut delta_e_vec = vec![0.0; y_end - y_start];
134
135            unsafe {
136                delta_e_row_fn(
137                    FrameRow {
138                        y: &frame1.planes[0].data[y_range.clone()],
139                        u: &frame1.planes[1].data[c_range.clone()],
140                        v: &frame1.planes[2].data[c_range.clone()],
141                    },
142                    FrameRow {
143                        y: &frame2.planes[0].data[y_range],
144                        u: &frame2.planes[1].data[c_range.clone()],
145                        v: &frame2.planes[2].data[c_range],
146                    },
147                    &mut delta_e_vec[..],
148                );
149            }
150
151            delta_e_vec.iter().map(|x| *x as f64).sum::<f64>()
152        });
153
154        let score =
155            45. - 20. * (delta_e_per_line.sum::<f64>() / ((y_width * y_height) as f64)).log10();
156        Ok(score.min(100.))
157    }
158
159    fn aggregate_frame_results(
160        &self,
161        metrics: &[Self::FrameResult],
162    ) -> Result<Self::VideoResult, Box<dyn Error>> {
163        Ok(metrics.iter().copied().sum::<f64>() / metrics.len() as f64)
164    }
165}
166
167// Arguments for delta e
168// "Color Image Quality Assessment Based on CIEDE2000"
169// Yang Yang, Jun Ming and Nenghai Yu, 2012
170// http://dx.doi.org/10.1155/2012/273723
171const K_SUB: KSubArgs = KSubArgs {
172    l: 0.65,
173    c: 1.0,
174    h: 4.0,
175};
176
177pub(crate) struct FrameRow<'a, T: Pixel> {
178    y: &'a [T],
179    u: &'a [T],
180    v: &'a [T],
181}
182
183type DeltaERowFn<T> = unsafe fn(FrameRow<T>, FrameRow<T>, &mut [f32]);
184
185fn get_delta_e_row_fn<T: Pixel>(bit_depth: usize, xdec: usize, simd: bool) -> DeltaERowFn<T> {
186    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
187    {
188        if is_x86_feature_detected!("avx2") && xdec == 1 && simd {
189            return match bit_depth {
190                8 => BD8::delta_e_row_avx2,
191                10 => BD10::delta_e_row_avx2,
192                12 => BD12::delta_e_row_avx2,
193                _ => unreachable!(),
194            };
195        }
196    }
197    match (bit_depth, xdec) {
198        (8, 1) => BD8::delta_e_row_scalar,
199        (10, 1) => BD10::delta_e_row_scalar,
200        (12, 1) => BD12::delta_e_row_scalar,
201        (8, 0) => BD8_444::delta_e_row_scalar,
202        (10, 0) => BD10_444::delta_e_row_scalar,
203        (12, 0) => BD12_444::delta_e_row_scalar,
204        _ => unreachable!(),
205    }
206}
207
208pub(crate) trait Colorspace {
209    const BIT_DEPTH: u32;
210    const X_DECIMATION: u32;
211}
212
213struct BD8;
214struct BD10;
215struct BD12;
216
217struct BD8_444;
218struct BD10_444;
219struct BD12_444;
220
221impl Colorspace for BD8 {
222    const BIT_DEPTH: u32 = 8;
223    const X_DECIMATION: u32 = 1;
224}
225impl Colorspace for BD10 {
226    const BIT_DEPTH: u32 = 10;
227    const X_DECIMATION: u32 = 1;
228}
229impl Colorspace for BD12 {
230    const BIT_DEPTH: u32 = 12;
231    const X_DECIMATION: u32 = 1;
232}
233impl Colorspace for BD8_444 {
234    const BIT_DEPTH: u32 = 8;
235    const X_DECIMATION: u32 = 0;
236}
237impl Colorspace for BD10_444 {
238    const BIT_DEPTH: u32 = 10;
239    const X_DECIMATION: u32 = 0;
240}
241impl Colorspace for BD12_444 {
242    const BIT_DEPTH: u32 = 12;
243    const X_DECIMATION: u32 = 0;
244}
245
246fn twice<T>(
247    i: T,
248) -> itertools::Interleave<<T as IntoIterator>::IntoIter, <T as IntoIterator>::IntoIter>
249where
250    T: IntoIterator + Clone,
251{
252    itertools::interleave(i.clone(), i)
253}
254
255pub(crate) trait DeltaEScalar: Colorspace {
256    fn delta_e_scalar(yuv1: (u16, u16, u16), yuv2: (u16, u16, u16)) -> f32 {
257        let scale = (1 << (Self::BIT_DEPTH - 8)) as f32;
258        let yuv_to_rgb = |yuv: (u16, u16, u16)| {
259            // Assumes BT.709
260            let y = (yuv.0 as f32 - 16. * scale) * (1. / (219. * scale));
261            let u = (yuv.1 as f32 - 128. * scale) * (1. / (224. * scale));
262            let v = (yuv.2 as f32 - 128. * scale) * (1. / (224. * scale));
263
264            // [-0.804677, 1.81723]
265            let r = y + 1.28033 * v;
266            // [−0.316650, 1.09589]
267            let g = y - 0.21482 * u - 0.38059 * v;
268            // [-1.28905, 2.29781]
269            let b = y + 2.12798 * u;
270
271            (r, g, b)
272        };
273
274        let (r1, g1, b1) = yuv_to_rgb(yuv1);
275        let (r2, g2, b2) = yuv_to_rgb(yuv2);
276        DE2000::new(rgb_to_lab(&[r1, g1, b1]), rgb_to_lab(&[r2, g2, b2]), K_SUB)
277    }
278
279    unsafe fn delta_e_row_scalar<T: Pixel>(
280        row1: FrameRow<T>,
281        row2: FrameRow<T>,
282        res_row: &mut [f32],
283    ) {
284        if Self::X_DECIMATION == 1 {
285            for (y1, u1, v1, y2, u2, v2, res) in izip!(
286                row1.y,
287                twice(row1.u),
288                twice(row1.v),
289                row2.y,
290                twice(row2.u),
291                twice(row2.v),
292                res_row
293            ) {
294                *res = Self::delta_e_scalar(
295                    (
296                        u16::cast_from(*y1),
297                        u16::cast_from(*u1),
298                        u16::cast_from(*v1),
299                    ),
300                    (
301                        u16::cast_from(*y2),
302                        u16::cast_from(*u2),
303                        u16::cast_from(*v2),
304                    ),
305                );
306            }
307        } else {
308            for (y1, u1, v1, y2, u2, v2, res) in
309                izip!(row1.y, row1.u, row1.v, row2.y, row2.u, row2.v, res_row)
310            {
311                *res = Self::delta_e_scalar(
312                    (
313                        u16::cast_from(*y1),
314                        u16::cast_from(*u1),
315                        u16::cast_from(*v1),
316                    ),
317                    (
318                        u16::cast_from(*y2),
319                        u16::cast_from(*u2),
320                        u16::cast_from(*v2),
321                    ),
322                );
323            }
324        }
325    }
326}
327
328impl DeltaEScalar for BD8 {}
329impl DeltaEScalar for BD10 {}
330impl DeltaEScalar for BD12 {}
331impl DeltaEScalar for BD8_444 {}
332impl DeltaEScalar for BD10_444 {}
333impl DeltaEScalar for BD12_444 {}
334
335#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
336use self::avx2::*;
337use std::error::Error;
338
339use super::FrameCompare;
340
341#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
342mod avx2 {
343    use super::*;
344
345    #[cfg(target_arch = "x86")]
346    use std::arch::x86::*;
347    #[cfg(target_arch = "x86_64")]
348    use std::arch::x86_64::*;
349
350    pub(crate) trait DeltaEAVX2: Colorspace + DeltaEScalar {
351        #[target_feature(enable = "avx2")]
352        unsafe fn yuv_to_rgb(yuv: (__m256, __m256, __m256)) -> (__m256, __m256, __m256) {
353            let scale: f32 = (1 << (Self::BIT_DEPTH - 8)) as f32;
354            #[target_feature(enable = "avx2")]
355            unsafe fn set1(val: f32) -> __m256 {
356                _mm256_set1_ps(val)
357            }
358            let y = _mm256_mul_ps(
359                _mm256_sub_ps(yuv.0, set1(16. * scale)),
360                set1(1. / (219. * scale)),
361            );
362            let u = _mm256_mul_ps(
363                _mm256_sub_ps(yuv.1, set1(128. * scale)),
364                set1(1. / (224. * scale)),
365            );
366            let v = _mm256_mul_ps(
367                _mm256_sub_ps(yuv.2, set1(128. * scale)),
368                set1(1. / (224. * scale)),
369            );
370
371            let r = _mm256_add_ps(y, _mm256_mul_ps(v, set1(1.28033)));
372            let g = _mm256_add_ps(
373                _mm256_add_ps(y, _mm256_mul_ps(u, set1(-0.21482))),
374                _mm256_mul_ps(v, set1(-0.38059)),
375            );
376            let b = _mm256_add_ps(y, _mm256_mul_ps(u, set1(2.12798)));
377
378            (r, g, b)
379        }
380
381        #[target_feature(enable = "avx2")]
382        unsafe fn delta_e_avx2(
383            yuv1: (__m256, __m256, __m256),
384            yuv2: (__m256, __m256, __m256),
385            res_chunk: &mut [f32],
386        ) {
387            let (r1, g1, b1) = Self::yuv_to_rgb(yuv1);
388            let (r2, g2, b2) = Self::yuv_to_rgb(yuv2);
389
390            let lab1 = rgb_to_lab_avx2(&[r1, g1, b1]);
391            let lab2 = rgb_to_lab_avx2(&[r2, g2, b2]);
392            for i in 0..8 {
393                res_chunk[i] = DE2000::new(lab1[i], lab2[i], K_SUB);
394            }
395        }
396
397        #[target_feature(enable = "avx2")]
398        unsafe fn delta_e_row_avx2<T: Pixel>(
399            row1: FrameRow<T>,
400            row2: FrameRow<T>,
401            res_row: &mut [f32],
402        ) {
403            // Only one version should be compiled for each trait
404            if Self::BIT_DEPTH == 8 {
405                for (chunk1_y, chunk1_u, chunk1_v, chunk2_y, chunk2_u, chunk2_v, res_chunk) in izip!(
406                    row1.y.chunks(8),
407                    row1.u.chunks(4),
408                    row1.v.chunks(4),
409                    row2.y.chunks(8),
410                    row2.u.chunks(4),
411                    row2.v.chunks(4),
412                    res_row.chunks_mut(8)
413                ) {
414                    if chunk1_y.len() == 8 {
415                        #[inline(always)]
416                        unsafe fn load_luma(chunk: &[u8]) -> __m256 {
417                            let tmp = _mm_loadl_epi64(chunk.as_ptr() as *const _);
418                            _mm256_cvtepi32_ps(_mm256_cvtepu8_epi32(tmp))
419                        }
420
421                        #[inline(always)]
422                        unsafe fn load_chroma(chunk: &[u8]) -> __m256 {
423                            let tmp = _mm_cvtsi32_si128(*(chunk.as_ptr() as *const i32));
424                            _mm256_cvtepi32_ps(_mm256_cvtepu8_epi32(_mm_unpacklo_epi8(tmp, tmp)))
425                        }
426
427                        Self::delta_e_avx2(
428                            (
429                                load_luma(
430                                    &chunk1_y
431                                        .iter()
432                                        .map(|p| u8::cast_from(*p))
433                                        .collect::<Vec<_>>(),
434                                ),
435                                load_chroma(
436                                    &chunk1_u
437                                        .iter()
438                                        .map(|p| u8::cast_from(*p))
439                                        .collect::<Vec<_>>(),
440                                ),
441                                load_chroma(
442                                    &chunk1_v
443                                        .iter()
444                                        .map(|p| u8::cast_from(*p))
445                                        .collect::<Vec<_>>(),
446                                ),
447                            ),
448                            (
449                                load_luma(
450                                    &chunk2_y
451                                        .iter()
452                                        .map(|p| u8::cast_from(*p))
453                                        .collect::<Vec<_>>(),
454                                ),
455                                load_chroma(
456                                    &chunk2_u
457                                        .iter()
458                                        .map(|p| u8::cast_from(*p))
459                                        .collect::<Vec<_>>(),
460                                ),
461                                load_chroma(
462                                    &chunk2_v
463                                        .iter()
464                                        .map(|p| u8::cast_from(*p))
465                                        .collect::<Vec<_>>(),
466                                ),
467                            ),
468                            res_chunk,
469                        );
470                    } else {
471                        Self::delta_e_row_scalar(
472                            FrameRow {
473                                y: chunk1_y,
474                                u: chunk1_u,
475                                v: chunk1_v,
476                            },
477                            FrameRow {
478                                y: chunk2_y,
479                                u: chunk2_u,
480                                v: chunk2_v,
481                            },
482                            res_chunk,
483                        );
484                    }
485                }
486            } else {
487                for (chunk1_y, chunk1_u, chunk1_v, chunk2_y, chunk2_u, chunk2_v, res_chunk) in izip!(
488                    row1.y.chunks(8),
489                    row1.u.chunks(4),
490                    row1.v.chunks(4),
491                    row2.y.chunks(8),
492                    row2.u.chunks(4),
493                    row2.v.chunks(4),
494                    res_row.chunks_mut(8)
495                ) {
496                    if chunk1_y.len() == 8 {
497                        #[inline(always)]
498                        unsafe fn load_luma(chunk: &[u16]) -> __m256 {
499                            let tmp = _mm_loadu_si128(chunk.as_ptr() as *const _);
500                            _mm256_cvtepi32_ps(_mm256_cvtepu16_epi32(tmp))
501                        }
502
503                        #[inline(always)]
504                        unsafe fn load_chroma(chunk: &[u16]) -> __m256 {
505                            let tmp = _mm_loadl_epi64(chunk.as_ptr() as *const _);
506                            _mm256_cvtepi32_ps(_mm256_cvtepu16_epi32(_mm_unpacklo_epi16(tmp, tmp)))
507                        }
508
509                        Self::delta_e_avx2(
510                            (
511                                load_luma(
512                                    &chunk1_y
513                                        .iter()
514                                        .map(|p| u16::cast_from(*p))
515                                        .collect::<Vec<_>>(),
516                                ),
517                                load_chroma(
518                                    &chunk1_u
519                                        .iter()
520                                        .map(|p| u16::cast_from(*p))
521                                        .collect::<Vec<_>>(),
522                                ),
523                                load_chroma(
524                                    &chunk1_v
525                                        .iter()
526                                        .map(|p| u16::cast_from(*p))
527                                        .collect::<Vec<_>>(),
528                                ),
529                            ),
530                            (
531                                load_luma(
532                                    &chunk2_y
533                                        .iter()
534                                        .map(|p| u16::cast_from(*p))
535                                        .collect::<Vec<_>>(),
536                                ),
537                                load_chroma(
538                                    &chunk2_u
539                                        .iter()
540                                        .map(|p| u16::cast_from(*p))
541                                        .collect::<Vec<_>>(),
542                                ),
543                                load_chroma(
544                                    &chunk2_v
545                                        .iter()
546                                        .map(|p| u16::cast_from(*p))
547                                        .collect::<Vec<_>>(),
548                                ),
549                            ),
550                            res_chunk,
551                        );
552                    } else {
553                        Self::delta_e_row_scalar(
554                            FrameRow {
555                                y: chunk1_y,
556                                u: chunk1_u,
557                                v: chunk1_v,
558                            },
559                            FrameRow {
560                                y: chunk2_y,
561                                u: chunk2_u,
562                                v: chunk2_v,
563                            },
564                            res_chunk,
565                        );
566                    }
567                }
568            }
569        }
570    }
571
572    impl DeltaEAVX2 for BD8 {}
573    impl DeltaEAVX2 for BD10 {}
574    impl DeltaEAVX2 for BD12 {}
575}