reed_solomon_16/
engine.rs

1//! Low-level building blocks for Reed-Solomon encoding/decoding.
2//!
3//! **This is an advanced module which is not needed for [simple usage] or [basic usage].**
4//!
5//! This module is relevant if you want to
6//! - use [`rate`] module and need an [`Engine`] to use with it.
7//! - create your own [`Engine`].
8//! - understand/benchmark/test at low level.
9//!
10//! # Engines
11//!
12//! An [`Engine`] is an implementation of basic low-level algorithms
13//! needed for Reed-Solomon encoding/decoding.
14//!
15//! - [`Naive`]
16//!     - Simple reference implementation.
17//! - [`NoSimd`]
18//!     - Basic optimized engine without SIMD so that it works on all CPUs.
19//! - [`DefaultEngine`]
20//!     - Default engine which is used when no specific engine is given.
21//!     - Currently just alias to [`NoSimd`].
22//!
23//! # Benchmarks
24//!
25//! - These benchmarks are from `cargo bench engine`
26//!   with 3.4 GHz i5-3570K (Ivy Bridge, 3rd gen.).
27//! - Shards are 1024 bytes.
28//!
29//! | Benchmark         | Shards  | ns [`Naive`] | ns [`NoSimd`] |
30//! | ----------------- | ------- | ------------ | ------------- |
31//! | xor               | 1 * 2   | 60           | 32            |
32//! | mul               | 1       | 1 260        | 860           |
33//! | xor_within        | 128 * 2 | 5 870        | 5 780         |
34//! | formal_derivative | 128     | 21 300       | 15 800        |
35//! | FFT               | 128     | 764 000      | 545 000       |
36//! | IFFT              | 128     | 780 000      | 546 000       |
37//! | FWHT              | -       | 898 000      | 622 000       |
38//!
39//! [simple usage]: crate#simple-usage
40//! [basic usage]: crate#basic-usage
41//! [`ReedSolomonEncoder`]: crate::ReedSolomonEncoder
42//! [`ReedSolomonDecoder`]: crate::ReedSolomonDecoder
43//! [`rate`]: crate::rate
44
45pub(crate) use self::shards::Shards;
46
47pub use self::{engine_naive::Naive, engine_nosimd::NoSimd, shards::ShardsRefMut};
48
49mod engine_naive;
50mod engine_nosimd;
51mod shards;
52
53pub mod tables;
54
55// ======================================================================
56// CONST - PUBLIC
57
58/// Size of Galois field element [`GfElement`] in bits.
59pub const GF_BITS: usize = 16;
60
61/// Galois field order, i.e. number of elements.
62pub const GF_ORDER: usize = 65536;
63
64/// `GF_ORDER - 1`
65pub const GF_MODULUS: GfElement = 65535;
66
67/// Galois field polynomial.
68pub const GF_POLYNOMIAL: usize = 0x1002D;
69
70/// TODO
71pub const CANTOR_BASIS: [GfElement; GF_BITS] = [
72    0x0001, 0xACCA, 0x3C0E, 0x163E, 0xC582, 0xED2E, 0x914C, 0x4012, 0x6C98, 0x10D8, 0x6A72, 0xB900,
73    0xFDB8, 0xFB34, 0xFF38, 0x991E,
74];
75
76// ======================================================================
77// TYPE ALIASES - PUBLIC
78
79/// Galois field element.
80pub type GfElement = u16;
81
82/// Default [`Engine`], currently just alias to [`NoSimd`].
83pub type DefaultEngine = NoSimd;
84
85// ======================================================================
86// FUNCTIONS - PUBLIC - Galois field operations
87
88/// Some kind of addition.
89#[inline(always)]
90pub fn add_mod(x: GfElement, y: GfElement) -> GfElement {
91    let sum: usize = (x as usize) + (y as usize);
92    (sum + (sum >> GF_BITS)) as GfElement
93}
94
95/// Some kind of subtraction.
96#[inline(always)]
97pub fn sub_mod(x: GfElement, y: GfElement) -> GfElement {
98    let dif: usize = (x as usize).wrapping_sub(y as usize);
99    dif.wrapping_add(dif >> GF_BITS) as GfElement
100}
101
102// ======================================================================
103// FUNCTIONS - PUBLIC - misc
104
105/// Returns smallest value that is greater than or equal to `a` and multiple of `b`,
106/// or `None` if `b` is zero or operation would overflow.
107///
108/// - This function is available as [`usize::checked_next_multiple_of`] in nightly Rust.
109///
110/// # Examples
111///
112/// ```rust
113/// use reed_solomon_16::engine;
114///
115/// assert_eq!(engine::checked_next_multiple_of(20, 10), Some(20));
116/// assert_eq!(engine::checked_next_multiple_of(27, 10), Some(30));
117/// ```
118///
119/// [`usize::checked_next_multiple_of`]: https://doc.rust-lang.org/std/primitive.usize.html#method.checked_next_multiple_of
120pub fn checked_next_multiple_of(a: usize, b: usize) -> Option<usize> {
121    if b == 0 {
122        None
123    } else {
124        let mut x = a / b;
125        x += if a % b != 0 { 1 } else { 0 };
126        x.checked_mul(b)
127    }
128}
129
130// ======================================================================
131// Engine - PUBLIC
132
133/// Implementation of basic low-level algorithms needed
134/// for Reed-Solomon encoding/decoding.
135///
136/// These algorithms are not properly documented.
137///
138/// [`Naive`] engine is provided for those who want to
139/// study the source code to understand [`Engine`].
140pub trait Engine: Clone
141where
142    Self: Sized,
143{
144    // ============================================================
145    // REQUIRED
146
147    /// In-place decimation-in-time FFT (fast Fourier transform).
148    ///
149    /// - FFT is done on chunk `data[pos .. pos + size]`
150    /// - `size` must be `2^n`
151    /// - Before function call `data[pos .. pos + size]` must be valid.
152    /// - After function call
153    ///     - `data[pos .. pos + truncated_size]`
154    ///       contains valid FFT result.
155    ///     - `data[pos + truncated_size .. pos + size]`
156    ///       contains valid FFT result if this contained
157    ///       only `0u8`:s and garbage otherwise.
158    fn fft(
159        &self,
160        data: &mut ShardsRefMut,
161        pos: usize,
162        size: usize,
163        truncated_size: usize,
164        skew_delta: usize,
165    );
166
167    /// In-place FWHT (fast Walsh-Hadamard transform).
168    ///
169    /// - This is used only in [`Engine::eval_poly`],
170    ///   both directly and indirectly via [`initialize_log_walsh`].
171    /// - `truncated_size` must be handled so that
172    ///   [`Engine::eval_poly`] returns correct result.
173    ///
174    /// [`initialize_log_walsh`]: self::tables::initialize_log_walsh
175    fn fwht(data: &mut [GfElement; GF_ORDER], truncated_size: usize);
176
177    /// In-place decimation-in-time IFFT (inverse fast Fourier transform).
178    ///
179    /// - IFFT is done on chunk `data[pos .. pos + size]`
180    /// - `size` must be `2^n`
181    /// - Before function call `data[pos .. pos + size]` must be valid.
182    /// - After function call
183    ///     - `data[pos .. pos + truncated_size]`
184    ///       contains valid IFFT result.
185    ///     - `data[pos + truncated_size .. pos + size]`
186    ///       contains valid IFFT result if this contained
187    ///       only `0u8`:s and garbage otherwise.
188    fn ifft(
189        &self,
190        data: &mut ShardsRefMut,
191        pos: usize,
192        size: usize,
193        truncated_size: usize,
194        skew_delta: usize,
195    );
196
197    /// `x[] *= log_m`
198    fn mul(&self, x: &mut [u8], log_m: GfElement);
199
200    /// `x[] ^= y[]`
201    fn xor(x: &mut [u8], y: &[u8]);
202
203    // ============================================================
204    // PROVIDED
205
206    /// Evaluate polynomial.
207    fn eval_poly(erasures: &mut [GfElement; GF_ORDER], truncated_size: usize) {
208        let log_walsh = tables::initialize_log_walsh::<Self>();
209
210        Self::fwht(erasures, truncated_size);
211
212        for i in 0..GF_ORDER {
213            erasures[i] = (((erasures[i] as usize) * (log_walsh[i] as usize))
214                % (GF_MODULUS as usize)) as GfElement;
215        }
216
217        Self::fwht(erasures, GF_ORDER);
218    }
219
220    /// FFT with `skew_delta = pos + size`.
221    #[inline(always)]
222    fn fft_skew_end(
223        &self,
224        data: &mut ShardsRefMut,
225        pos: usize,
226        size: usize,
227        truncated_size: usize,
228    ) {
229        self.fft(data, pos, size, truncated_size, pos + size)
230    }
231
232    /// Formal derivative.
233    fn formal_derivative(data: &mut ShardsRefMut) {
234        for i in 1..data.len() {
235            let width: usize = ((i ^ (i - 1)) + 1) >> 1;
236            Self::xor_within(data, i - width, i, width);
237        }
238    }
239
240    /// IFFT with `skew_delta = pos + size`.
241    #[inline(always)]
242    fn ifft_skew_end(
243        &self,
244        data: &mut ShardsRefMut,
245        pos: usize,
246        size: usize,
247        truncated_size: usize,
248    ) {
249        self.ifft(data, pos, size, truncated_size, pos + size)
250    }
251
252    /// `data[x .. x + count] ^= data[y .. y + count]`
253    ///
254    /// Ranges must not overlap.
255    #[inline(always)]
256    fn xor_within(data: &mut ShardsRefMut, x: usize, y: usize, count: usize) {
257        let (xs, ys) = data.flat2_mut(x, y, count);
258        Self::xor(xs, ys);
259    }
260}
261
262// ======================================================================
263// TESTS
264
265// Engines are tested indirectly via roundtrip tests of HighRate and LowRate.
266
267#[cfg(test)]
268mod tests {
269    use super::*;
270
271    // ============================================================
272    // checked_next_multiple_of
273
274    #[test]
275    fn test_checked_next_multiple_of() {
276        assert_eq!(checked_next_multiple_of(10, 0), None);
277        assert_eq!(checked_next_multiple_of(usize::MAX, 2), None);
278
279        assert_eq!(checked_next_multiple_of(99, 20), Some(100));
280        assert_eq!(checked_next_multiple_of(100, 20), Some(100));
281        assert_eq!(checked_next_multiple_of(101, 20), Some(120));
282    }
283}