reed_solomon_simd/
engine.rs

1//! Low-level building blocks for Reed-Solomon encoding/decoding.
2//!
3//! **This is an advanced module which is not needed for [simple usage] or [basic usage].**
4//!
5//! This module is relevant if you want to
6//! - use [`rate`] module and need an [`Engine`] to use with it.
7//! - create your own [`Engine`].
8//! - understand/benchmark/test at low level.
9//!
10//! # Engines
11//!
12//! An [`Engine`] is an implementation of basic low-level algorithms
13//! needed for Reed-Solomon encoding/decoding.
14//!
15//! - [`Naive`]
16//!     - Simple reference implementation.
17//! - [`NoSimd`]
18//!     - Basic optimized engine without SIMD so that it works on all CPUs.
19//! - [`Avx2`]
20//!     - Optimized engine that takes advantage of the x86(-64) AVX2 SIMD instructions.
21//! - [`Ssse3`]
22//!     - Optimized engine that takes advantage of the x86(-64) SSSE3 SIMD instructions.
23//! - [`Neon`]
24//!     - Optimized engine that takes advantage of the `AArch64` Neon SIMD instructions.
25//! - [`DefaultEngine`]
26//!     - Default engine which is used when no specific engine is given.
27//!     - Automatically selects best engine at runtime.
28//!
29//! [simple usage]: crate#simple-usage
30//! [basic usage]: crate#basic-usage
31//! [`ReedSolomonEncoder`]: crate::ReedSolomonEncoder
32//! [`ReedSolomonDecoder`]: crate::ReedSolomonDecoder
33//! [`rate`]: crate::rate
34
35pub(crate) use self::shards::Shards;
36pub(crate) use utils::{fft_skew_end, formal_derivative, ifft_skew_end, xor_within};
37
38pub use self::{
39    engine_default::DefaultEngine, engine_naive::Naive, engine_nosimd::NoSimd, shards::ShardsRefMut,
40};
41
42#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
43pub use self::{engine_avx2::Avx2, engine_ssse3::Ssse3};
44
45#[cfg(target_arch = "aarch64")]
46pub use self::engine_neon::Neon;
47
48mod engine_default;
49mod engine_naive;
50mod engine_nosimd;
51
52#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
53mod engine_avx2;
54#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
55mod engine_ssse3;
56
57#[cfg(target_arch = "aarch64")]
58mod engine_neon;
59
60mod fwht;
61mod shards;
62
63pub mod tables;
64pub mod utils;
65
66// ======================================================================
67// CONST - PUBLIC
68
69/// Size of Galois field element [`GfElement`] in bits.
70pub const GF_BITS: usize = 16;
71
72/// Galois field order, i.e. number of elements.
73pub const GF_ORDER: usize = 65536;
74
75/// `GF_ORDER - 1`
76pub const GF_MODULUS: GfElement = 65535;
77
78/// Galois field polynomial.
79pub const GF_POLYNOMIAL: usize = 0x1002D;
80
81/// TODO
82pub const CANTOR_BASIS: [GfElement; GF_BITS] = [
83    0x0001, 0xACCA, 0x3C0E, 0x163E, 0xC582, 0xED2E, 0x914C, 0x4012, 0x6C98, 0x10D8, 0x6A72, 0xB900,
84    0xFDB8, 0xFB34, 0xFF38, 0x991E,
85];
86
87// ======================================================================
88// TYPE ALIASES - PUBLIC
89
90/// Galois field element.
91pub type GfElement = u16;
92
93// ======================================================================
94// Engine - PUBLIC
95
96/// Trait for compute-intensive low-level algorithms needed
97/// for Reed-Solomon encoding/decoding.
98///
99/// This is the trait you would implement to provide SIMD support
100/// for a CPU architecture not already provided.
101///
102/// [`Naive`] engine is provided for those who want to
103/// study the source code to understand [`Engine`].
104pub trait Engine {
105    // ============================================================
106    // REQUIRED
107
108    /// In-place decimation-in-time FFT (fast Fourier transform).
109    ///
110    /// - FFT is done on chunk `data[pos .. pos + size]`
111    /// - `size` must be `2^n`
112    /// - Before function call `data[pos .. pos + size]` must be valid.
113    /// - After function call
114    ///     - `data[pos .. pos + truncated_size]`
115    ///       contains valid FFT result.
116    ///     - `data[pos + truncated_size .. pos + size]`
117    ///       contains valid FFT result if this contained
118    ///       only `0u8`:s and garbage otherwise.
119    fn fft(
120        &self,
121        data: &mut ShardsRefMut,
122        pos: usize,
123        size: usize,
124        truncated_size: usize,
125        skew_delta: usize,
126    );
127
128    /// In-place decimation-in-time IFFT (inverse fast Fourier transform).
129    ///
130    /// - IFFT is done on chunk `data[pos .. pos + size]`
131    /// - `size` must be `2^n`
132    /// - Before function call `data[pos .. pos + size]` must be valid.
133    /// - After function call
134    ///     - `data[pos .. pos + truncated_size]`
135    ///       contains valid IFFT result.
136    ///     - `data[pos + truncated_size .. pos + size]`
137    ///       contains valid IFFT result if this contained
138    ///       only `0u8`:s and garbage otherwise.
139    fn ifft(
140        &self,
141        data: &mut ShardsRefMut,
142        pos: usize,
143        size: usize,
144        truncated_size: usize,
145        skew_delta: usize,
146    );
147
148    /// `x[] *= log_m`
149    fn mul(&self, x: &mut [[u8; 64]], log_m: GfElement);
150
151    // ============================================================
152    // PROVIDED
153
154    /// Evaluate polynomial.
155    fn eval_poly(erasures: &mut [GfElement; GF_ORDER], truncated_size: usize)
156    where
157        Self: Sized,
158    {
159        utils::eval_poly(erasures, truncated_size);
160    }
161}
162
163// ======================================================================
164// TESTS
165
166// Engines are tested indirectly via roundtrip tests of HighRate and LowRate.