#![allow(clippy::cast_ptr_alignment)]
use crate::video::decode::Decoder;
use crate::video::pixel::{CastFromPrimitive, Pixel};
use crate::video::VideoMetric;
use crate::MetricsError;
use std::f64;
use std::mem::size_of;
mod rgbtolab;
use rgbtolab::*;
mod delta_e;
use delta_e::*;
#[inline]
pub fn calculate_video_ciede<D: Decoder, F: Fn(usize) + Send>(
decoder1: &mut D,
decoder2: &mut D,
frame_limit: Option<usize>,
progress_callback: F,
) -> Result<f64, Box<dyn Error>> {
Ciede2000::default().process_video(decoder1, decoder2, frame_limit, progress_callback)
}
#[inline]
#[doc(hidden)]
pub fn calculate_video_ciede_nosimd<D: Decoder, F: Fn(usize) + Send>(
decoder1: &mut D,
decoder2: &mut D,
frame_limit: Option<usize>,
progress_callback: F,
) -> Result<f64, Box<dyn Error>> {
(Ciede2000 { use_simd: false }).process_video(
decoder1,
decoder2,
frame_limit,
progress_callback,
)
}
#[inline]
pub fn calculate_frame_ciede<T: Pixel>(
frame1: &Frame<T>,
frame2: &Frame<T>,
bit_depth: usize,
chroma_sampling: ChromaSampling,
) -> Result<f64, Box<dyn Error>> {
Ciede2000::default().process_frame(frame1, frame2, bit_depth, chroma_sampling)
}
#[inline]
#[doc(hidden)]
pub fn calculate_frame_ciede_nosimd<T: Pixel>(
frame1: &Frame<T>,
frame2: &Frame<T>,
bit_depth: usize,
chroma_sampling: ChromaSampling,
) -> Result<f64, Box<dyn Error>> {
(Ciede2000 { use_simd: false }).process_frame(frame1, frame2, bit_depth, chroma_sampling)
}
struct Ciede2000 {
use_simd: bool,
}
impl Default for Ciede2000 {
fn default() -> Self {
Ciede2000 { use_simd: true }
}
}
use rayon::prelude::*;
use v_frame::frame::Frame;
use v_frame::prelude::ChromaSampling;
impl VideoMetric for Ciede2000 {
type FrameResult = f64;
type VideoResult = f64;
fn process_frame<T: Pixel>(
&self,
frame1: &Frame<T>,
frame2: &Frame<T>,
bit_depth: usize,
chroma_sampling: ChromaSampling,
) -> Result<Self::FrameResult, Box<dyn Error>> {
if (size_of::<T>() == 1 && bit_depth > 8) || (size_of::<T>() == 2 && bit_depth <= 8) {
return Err(Box::new(MetricsError::InputMismatch {
reason: "Bit depths does not match pixel width",
}));
}
frame1.can_compare(frame2)?;
let dec = chroma_sampling.get_decimation().unwrap_or((1, 1));
let y_width = frame1.planes[0].cfg.width;
let y_height = frame1.planes[0].cfg.height;
let c_width = frame1.planes[1].cfg.width;
let delta_e_row_fn = get_delta_e_row_fn(bit_depth, dec.0, self.use_simd);
let delta_e_per_line = (0..y_height).into_par_iter().map(|i| {
let y_start = i * y_width;
let y_end = y_start + y_width;
let c_start = (i >> dec.1) * c_width;
let c_end = c_start + c_width;
let y_range = y_start..y_end;
let c_range = c_start..c_end;
let mut delta_e_vec = vec![0.0; y_end - y_start];
unsafe {
delta_e_row_fn(
FrameRow {
y: &frame1.planes[0].data[y_range.clone()],
u: &frame1.planes[1].data[c_range.clone()],
v: &frame1.planes[2].data[c_range.clone()],
},
FrameRow {
y: &frame2.planes[0].data[y_range],
u: &frame2.planes[1].data[c_range.clone()],
v: &frame2.planes[2].data[c_range],
},
&mut delta_e_vec[..],
);
}
delta_e_vec.iter().map(|x| *x as f64).sum::<f64>()
});
let score =
45. - 20. * (delta_e_per_line.sum::<f64>() / ((y_width * y_height) as f64)).log10();
Ok(score.min(100.))
}
fn aggregate_frame_results(
&self,
metrics: &[Self::FrameResult],
) -> Result<Self::VideoResult, Box<dyn Error>> {
Ok(metrics.iter().copied().sum::<f64>() / metrics.len() as f64)
}
}
const K_SUB: KSubArgs = KSubArgs {
l: 0.65,
c: 1.0,
h: 4.0,
};
pub(crate) struct FrameRow<'a, T: Pixel> {
y: &'a [T],
u: &'a [T],
v: &'a [T],
}
type DeltaERowFn<T> = unsafe fn(FrameRow<T>, FrameRow<T>, &mut [f32]);
fn get_delta_e_row_fn<T: Pixel>(bit_depth: usize, xdec: usize, simd: bool) -> DeltaERowFn<T> {
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
{
if is_x86_feature_detected!("avx2") && xdec == 1 && simd {
return match bit_depth {
8 => BD8::delta_e_row_avx2,
10 => BD10::delta_e_row_avx2,
12 => BD12::delta_e_row_avx2,
_ => unreachable!(),
};
}
}
match (bit_depth, xdec) {
(8, 1) => BD8::delta_e_row_scalar,
(10, 1) => BD10::delta_e_row_scalar,
(12, 1) => BD12::delta_e_row_scalar,
(8, 0) => BD8_444::delta_e_row_scalar,
(10, 0) => BD10_444::delta_e_row_scalar,
(12, 0) => BD12_444::delta_e_row_scalar,
_ => unreachable!(),
}
}
pub(crate) trait Colorspace {
const BIT_DEPTH: u32;
const X_DECIMATION: u32;
}
struct BD8;
struct BD10;
struct BD12;
struct BD8_444;
struct BD10_444;
struct BD12_444;
impl Colorspace for BD8 {
const BIT_DEPTH: u32 = 8;
const X_DECIMATION: u32 = 1;
}
impl Colorspace for BD10 {
const BIT_DEPTH: u32 = 10;
const X_DECIMATION: u32 = 1;
}
impl Colorspace for BD12 {
const BIT_DEPTH: u32 = 12;
const X_DECIMATION: u32 = 1;
}
impl Colorspace for BD8_444 {
const BIT_DEPTH: u32 = 8;
const X_DECIMATION: u32 = 0;
}
impl Colorspace for BD10_444 {
const BIT_DEPTH: u32 = 10;
const X_DECIMATION: u32 = 0;
}
impl Colorspace for BD12_444 {
const BIT_DEPTH: u32 = 12;
const X_DECIMATION: u32 = 0;
}
fn twice<T>(
i: T,
) -> itertools::Interleave<<T as IntoIterator>::IntoIter, <T as IntoIterator>::IntoIter>
where
T: IntoIterator + Clone,
{
itertools::interleave(i.clone(), i)
}
pub(crate) trait DeltaEScalar: Colorspace {
fn delta_e_scalar(yuv1: (u16, u16, u16), yuv2: (u16, u16, u16)) -> f32 {
let scale = (1 << (Self::BIT_DEPTH - 8)) as f32;
let yuv_to_rgb = |yuv: (u16, u16, u16)| {
let y = (yuv.0 as f32 - 16. * scale) * (1. / (219. * scale));
let u = (yuv.1 as f32 - 128. * scale) * (1. / (224. * scale));
let v = (yuv.2 as f32 - 128. * scale) * (1. / (224. * scale));
let r = y + 1.28033 * v;
let g = y - 0.21482 * u - 0.38059 * v;
let b = y + 2.12798 * u;
(r, g, b)
};
let (r1, g1, b1) = yuv_to_rgb(yuv1);
let (r2, g2, b2) = yuv_to_rgb(yuv2);
DE2000::new(rgb_to_lab(&[r1, g1, b1]), rgb_to_lab(&[r2, g2, b2]), K_SUB)
}
unsafe fn delta_e_row_scalar<T: Pixel>(
row1: FrameRow<T>,
row2: FrameRow<T>,
res_row: &mut [f32],
) {
if Self::X_DECIMATION == 1 {
for (y1, u1, v1, y2, u2, v2, res) in izip!(
row1.y,
twice(row1.u),
twice(row1.v),
row2.y,
twice(row2.u),
twice(row2.v),
res_row
) {
*res = Self::delta_e_scalar(
(
u16::cast_from(*y1),
u16::cast_from(*u1),
u16::cast_from(*v1),
),
(
u16::cast_from(*y2),
u16::cast_from(*u2),
u16::cast_from(*v2),
),
);
}
} else {
for (y1, u1, v1, y2, u2, v2, res) in
izip!(row1.y, row1.u, row1.v, row2.y, row2.u, row2.v, res_row)
{
*res = Self::delta_e_scalar(
(
u16::cast_from(*y1),
u16::cast_from(*u1),
u16::cast_from(*v1),
),
(
u16::cast_from(*y2),
u16::cast_from(*u2),
u16::cast_from(*v2),
),
);
}
}
}
}
impl DeltaEScalar for BD8 {}
impl DeltaEScalar for BD10 {}
impl DeltaEScalar for BD12 {}
impl DeltaEScalar for BD8_444 {}
impl DeltaEScalar for BD10_444 {}
impl DeltaEScalar for BD12_444 {}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
use self::avx2::*;
use std::error::Error;
use super::FrameCompare;
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
mod avx2 {
use super::*;
#[cfg(target_arch = "x86")]
use std::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;
pub(crate) trait DeltaEAVX2: Colorspace + DeltaEScalar {
#[target_feature(enable = "avx2")]
unsafe fn yuv_to_rgb(yuv: (__m256, __m256, __m256)) -> (__m256, __m256, __m256) {
let scale: f32 = (1 << (Self::BIT_DEPTH - 8)) as f32;
#[target_feature(enable = "avx2")]
unsafe fn set1(val: f32) -> __m256 {
_mm256_set1_ps(val)
}
let y = _mm256_mul_ps(
_mm256_sub_ps(yuv.0, set1(16. * scale)),
set1(1. / (219. * scale)),
);
let u = _mm256_mul_ps(
_mm256_sub_ps(yuv.1, set1(128. * scale)),
set1(1. / (224. * scale)),
);
let v = _mm256_mul_ps(
_mm256_sub_ps(yuv.2, set1(128. * scale)),
set1(1. / (224. * scale)),
);
let r = _mm256_add_ps(y, _mm256_mul_ps(v, set1(1.28033)));
let g = _mm256_add_ps(
_mm256_add_ps(y, _mm256_mul_ps(u, set1(-0.21482))),
_mm256_mul_ps(v, set1(-0.38059)),
);
let b = _mm256_add_ps(y, _mm256_mul_ps(u, set1(2.12798)));
(r, g, b)
}
#[target_feature(enable = "avx2")]
unsafe fn delta_e_avx2(
yuv1: (__m256, __m256, __m256),
yuv2: (__m256, __m256, __m256),
res_chunk: &mut [f32],
) {
let (r1, g1, b1) = Self::yuv_to_rgb(yuv1);
let (r2, g2, b2) = Self::yuv_to_rgb(yuv2);
let lab1 = rgb_to_lab_avx2(&[r1, g1, b1]);
let lab2 = rgb_to_lab_avx2(&[r2, g2, b2]);
for i in 0..8 {
res_chunk[i] = DE2000::new(lab1[i], lab2[i], K_SUB);
}
}
#[target_feature(enable = "avx2")]
unsafe fn delta_e_row_avx2<T: Pixel>(
row1: FrameRow<T>,
row2: FrameRow<T>,
res_row: &mut [f32],
) {
if Self::BIT_DEPTH == 8 {
for (chunk1_y, chunk1_u, chunk1_v, chunk2_y, chunk2_u, chunk2_v, res_chunk) in izip!(
row1.y.chunks(8),
row1.u.chunks(4),
row1.v.chunks(4),
row2.y.chunks(8),
row2.u.chunks(4),
row2.v.chunks(4),
res_row.chunks_mut(8)
) {
if chunk1_y.len() == 8 {
#[inline(always)]
unsafe fn load_luma(chunk: &[u8]) -> __m256 {
let tmp = _mm_loadl_epi64(chunk.as_ptr() as *const _);
_mm256_cvtepi32_ps(_mm256_cvtepu8_epi32(tmp))
}
#[inline(always)]
unsafe fn load_chroma(chunk: &[u8]) -> __m256 {
let tmp = _mm_cvtsi32_si128(*(chunk.as_ptr() as *const i32));
_mm256_cvtepi32_ps(_mm256_cvtepu8_epi32(_mm_unpacklo_epi8(tmp, tmp)))
}
Self::delta_e_avx2(
(
load_luma(
&chunk1_y
.iter()
.map(|p| u8::cast_from(*p))
.collect::<Vec<_>>(),
),
load_chroma(
&chunk1_u
.iter()
.map(|p| u8::cast_from(*p))
.collect::<Vec<_>>(),
),
load_chroma(
&chunk1_v
.iter()
.map(|p| u8::cast_from(*p))
.collect::<Vec<_>>(),
),
),
(
load_luma(
&chunk2_y
.iter()
.map(|p| u8::cast_from(*p))
.collect::<Vec<_>>(),
),
load_chroma(
&chunk2_u
.iter()
.map(|p| u8::cast_from(*p))
.collect::<Vec<_>>(),
),
load_chroma(
&chunk2_v
.iter()
.map(|p| u8::cast_from(*p))
.collect::<Vec<_>>(),
),
),
res_chunk,
);
} else {
Self::delta_e_row_scalar(
FrameRow {
y: chunk1_y,
u: chunk1_u,
v: chunk1_v,
},
FrameRow {
y: chunk2_y,
u: chunk2_u,
v: chunk2_v,
},
res_chunk,
);
}
}
} else {
for (chunk1_y, chunk1_u, chunk1_v, chunk2_y, chunk2_u, chunk2_v, res_chunk) in izip!(
row1.y.chunks(8),
row1.u.chunks(4),
row1.v.chunks(4),
row2.y.chunks(8),
row2.u.chunks(4),
row2.v.chunks(4),
res_row.chunks_mut(8)
) {
if chunk1_y.len() == 8 {
#[inline(always)]
unsafe fn load_luma(chunk: &[u16]) -> __m256 {
let tmp = _mm_loadu_si128(chunk.as_ptr() as *const _);
_mm256_cvtepi32_ps(_mm256_cvtepu16_epi32(tmp))
}
#[inline(always)]
unsafe fn load_chroma(chunk: &[u16]) -> __m256 {
let tmp = _mm_loadl_epi64(chunk.as_ptr() as *const _);
_mm256_cvtepi32_ps(_mm256_cvtepu16_epi32(_mm_unpacklo_epi16(tmp, tmp)))
}
Self::delta_e_avx2(
(
load_luma(
&chunk1_y
.iter()
.map(|p| u16::cast_from(*p))
.collect::<Vec<_>>(),
),
load_chroma(
&chunk1_u
.iter()
.map(|p| u16::cast_from(*p))
.collect::<Vec<_>>(),
),
load_chroma(
&chunk1_v
.iter()
.map(|p| u16::cast_from(*p))
.collect::<Vec<_>>(),
),
),
(
load_luma(
&chunk2_y
.iter()
.map(|p| u16::cast_from(*p))
.collect::<Vec<_>>(),
),
load_chroma(
&chunk2_u
.iter()
.map(|p| u16::cast_from(*p))
.collect::<Vec<_>>(),
),
load_chroma(
&chunk2_v
.iter()
.map(|p| u16::cast_from(*p))
.collect::<Vec<_>>(),
),
),
res_chunk,
);
} else {
Self::delta_e_row_scalar(
FrameRow {
y: chunk1_y,
u: chunk1_u,
v: chunk1_v,
},
FrameRow {
y: chunk2_y,
u: chunk2_u,
v: chunk2_v,
},
res_chunk,
);
}
}
}
}
}
impl DeltaEAVX2 for BD8 {}
impl DeltaEAVX2 for BD10 {}
impl DeltaEAVX2 for BD12 {}
}