1use crate::{
29 cfg_chunks_mut,
30 cfg_into_iter,
31 cfg_iter,
32 cfg_iter_mut,
33 fft::{DomainCoeff, SparsePolynomial},
34};
35use snarkvm_fields::{FftField, FftParameters, Field, batch_inversion};
36#[cfg(not(feature = "serial"))]
37use snarkvm_utilities::max_available_threads;
38use snarkvm_utilities::{execute_with_max_available_threads, serialize::*};
39
40use rand::Rng;
41use std::{borrow::Cow, fmt};
42
43use anyhow::{Result, ensure};
44
45#[cfg(not(feature = "serial"))]
46use rayon::prelude::*;
47
48#[cfg(feature = "serial")]
49use itertools::Itertools;
50
51pub fn log2(x: usize) -> u32 {
65 if x == 0 {
66 0
67 } else if x.is_power_of_two() {
68 1usize.leading_zeros() - x.leading_zeros()
69 } else {
70 0usize.leading_zeros() - x.leading_zeros()
71 }
72}
73
74#[allow(unused)]
76#[cfg(not(feature = "serial"))]
77const MIN_PARALLEL_CHUNK_SIZE: usize = 1 << 7;
78
79#[derive(Copy, Clone, Hash, Eq, PartialEq, CanonicalSerialize, CanonicalDeserialize)]
83pub struct EvaluationDomain<F: FftField> {
84 pub size: u64,
86 pub log_size_of_group: u32,
88 pub size_as_field_element: F,
90 pub size_inv: F,
92 pub group_gen: F,
94 pub group_gen_inv: F,
96 pub generator_inv: F,
98}
99
100impl<F: FftField> fmt::Debug for EvaluationDomain<F> {
101 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
102 write!(f, "Multiplicative subgroup of size {}", self.size)
103 }
104}
105
106impl<F: FftField> EvaluationDomain<F> {
107 pub fn sample_element_outside_domain<R: Rng>(&self, rng: &mut R) -> F {
109 let mut t = F::rand(rng);
110 while self.evaluate_vanishing_polynomial(t).is_zero() {
111 t = F::rand(rng);
112 }
113 t
114 }
115
116 pub fn new(num_coeffs: usize) -> Option<Self> {
119 let size = num_coeffs.checked_next_power_of_two()? as u64;
121 let log_size_of_group = size.trailing_zeros();
122
123 if log_size_of_group > F::FftParameters::TWO_ADICITY {
125 return None;
126 }
127
128 let group_gen = F::get_root_of_unity(size as usize)?;
131
132 debug_assert_eq!(group_gen.pow([size]), F::one());
134
135 let size_as_field_element = F::from(size);
136 let size_inv = size_as_field_element.inverse()?;
137
138 Some(EvaluationDomain {
139 size,
140 log_size_of_group,
141 size_as_field_element,
142 size_inv,
143 group_gen,
144 group_gen_inv: group_gen.inverse()?,
145 generator_inv: F::multiplicative_generator().inverse()?,
146 })
147 }
148
149 pub fn compute_size_of_domain(num_coeffs: usize) -> Option<usize> {
152 let size = num_coeffs.checked_next_power_of_two()?;
153 if size.trailing_zeros() <= F::FftParameters::TWO_ADICITY { Some(size) } else { None }
154 }
155
156 pub fn size(&self) -> usize {
158 self.size as usize
159 }
160
161 pub fn fft<T: DomainCoeff<F>>(&self, coeffs: &[T]) -> Vec<T> {
163 let mut coeffs = coeffs.to_vec();
164 self.fft_in_place(&mut coeffs);
165 coeffs
166 }
167
168 pub fn fft_in_place<T: DomainCoeff<F>>(&self, coeffs: &mut Vec<T>) {
170 execute_with_max_available_threads(|| {
171 coeffs.resize(self.size(), T::zero());
172 self.in_order_fft_in_place(&mut *coeffs);
173 });
174 }
175
176 pub fn ifft<T: DomainCoeff<F>>(&self, evals: &[T]) -> Vec<T> {
178 let mut evals = evals.to_vec();
179 self.ifft_in_place(&mut evals);
180 evals
181 }
182
183 #[inline]
185 pub fn ifft_in_place<T: DomainCoeff<F>>(&self, evals: &mut Vec<T>) {
186 execute_with_max_available_threads(|| {
187 evals.resize(self.size(), T::zero());
188 self.in_order_ifft_in_place(&mut *evals);
189 });
190 }
191
192 pub fn coset_fft<T: DomainCoeff<F>>(&self, coeffs: &[T]) -> Vec<T> {
194 let mut coeffs = coeffs.to_vec();
195 self.coset_fft_in_place(&mut coeffs);
196 coeffs
197 }
198
199 pub fn coset_fft_in_place<T: DomainCoeff<F>>(&self, coeffs: &mut Vec<T>) {
202 execute_with_max_available_threads(|| {
203 Self::distribute_powers(coeffs, F::multiplicative_generator());
204 self.fft_in_place(coeffs);
205 });
206 }
207
208 pub fn coset_ifft<T: DomainCoeff<F>>(&self, evals: &[T]) -> Vec<T> {
210 let mut evals = evals.to_vec();
211 self.coset_ifft_in_place(&mut evals);
212 evals
213 }
214
215 pub fn coset_ifft_in_place<T: DomainCoeff<F>>(&self, evals: &mut Vec<T>) {
217 execute_with_max_available_threads(|| {
218 evals.resize(self.size(), T::zero());
219 self.in_order_coset_ifft_in_place(&mut *evals);
220 });
221 }
222
223 fn distribute_powers<T: DomainCoeff<F>>(coeffs: &mut [T], g: F) {
225 Self::distribute_powers_and_mul_by_const(coeffs, g, F::one());
226 }
227
228 #[cfg(feature = "serial")]
230 fn distribute_powers_and_mul_by_const<T: DomainCoeff<F>>(coeffs: &mut [T], g: F, c: F) {
231 let mut pow = c;
233 coeffs.iter_mut().for_each(|coeff| {
234 *coeff *= pow;
235 pow *= &g
236 })
237 }
238
239 #[cfg(not(feature = "serial"))]
241 fn distribute_powers_and_mul_by_const<T: DomainCoeff<F>>(coeffs: &mut [T], g: F, c: F) {
242 let min_parallel_chunk_size = 1024;
243 let num_cpus_available = max_available_threads();
244 let num_elem_per_thread = core::cmp::max(coeffs.len() / num_cpus_available, min_parallel_chunk_size);
245
246 cfg_chunks_mut!(coeffs, num_elem_per_thread).enumerate().for_each(|(i, chunk)| {
247 let offset = c * g.pow([(i * num_elem_per_thread) as u64]);
248 let mut pow = offset;
249 chunk.iter_mut().for_each(|coeff| {
250 *coeff *= pow;
251 pow *= &g
252 })
253 });
254 }
255
256 pub fn evaluate_all_lagrange_coefficients(&self, tau: F) -> Vec<F> {
259 let size = self.size as usize;
261 let t_size = tau.pow([self.size]);
262 let one = F::one();
263 if t_size.is_one() {
264 let mut u = vec![F::zero(); size];
265 let mut omega_i = one;
266 for x in u.iter_mut().take(size) {
267 if omega_i == tau {
268 *x = one;
269 break;
270 }
271 omega_i *= &self.group_gen;
272 }
273 u
274 } else {
275 let mut l = (t_size - one) * self.size_inv;
276 let mut r = one;
277 let mut u = vec![F::zero(); size];
278 let mut ls = vec![F::zero(); size];
279 for i in 0..size {
280 u[i] = tau - r;
281 ls[i] = l;
282 l *= &self.group_gen;
283 r *= &self.group_gen;
284 }
285
286 batch_inversion(u.as_mut_slice());
287 cfg_iter_mut!(u).zip_eq(ls).for_each(|(tau_minus_r, l)| {
288 *tau_minus_r = l * *tau_minus_r;
289 });
290 u
291 }
292 }
293
294 pub fn vanishing_polynomial(&self) -> SparsePolynomial<F> {
296 let coeffs = [(0, -F::one()), (self.size(), F::one())];
297 SparsePolynomial::from_coefficients(coeffs)
298 }
299
300 pub fn evaluate_vanishing_polynomial(&self, tau: F) -> F {
303 tau.pow([self.size]) - F::one()
304 }
305
306 pub fn elements(&self) -> Elements<F> {
308 Elements { cur_elem: F::one(), cur_pow: 0, domain: *self }
309 }
310
311 pub fn divide_by_vanishing_poly_on_coset_in_place(&self, evals: &mut [F]) {
315 let i = self.evaluate_vanishing_polynomial(F::multiplicative_generator()).inverse().unwrap();
316
317 cfg_iter_mut!(evals).for_each(|eval| *eval *= &i);
318 }
319
320 pub fn reindex_by_subdomain(&self, other: &Self, index: usize) -> Result<usize> {
323 ensure!(self.size() > other.size(), "other.size() must be smaller than self.size()");
324
325 let period = self.size() / other.size();
330 if index < other.size() {
331 Ok(index * period)
332 } else {
333 let i = index - other.size();
341 let x = period - 1;
342 Ok(i + (i / x) + 1)
343 }
344 }
345
346 pub fn mul_polynomials_in_evaluation_domain(&self, self_evals: Vec<F>, other_evals: &[F]) -> Result<Vec<F>> {
350 let mut result = self_evals;
351
352 ensure!(result.len() == other_evals.len());
353 cfg_iter_mut!(result).zip_eq(other_evals).for_each(|(a, b)| *a *= b);
354
355 Ok(result)
356 }
357}
358
359impl<F: FftField> EvaluationDomain<F> {
360 pub fn precompute_fft(&self) -> FFTPrecomputation<F> {
361 execute_with_max_available_threads(|| FFTPrecomputation {
362 roots: self.roots_of_unity(self.group_gen),
363 domain: *self,
364 })
365 }
366
367 pub fn precompute_ifft(&self) -> IFFTPrecomputation<F> {
368 execute_with_max_available_threads(|| IFFTPrecomputation {
369 inverse_roots: self.roots_of_unity(self.group_gen_inv),
370 domain: *self,
371 })
372 }
373
374 pub(crate) fn in_order_fft_in_place<T: DomainCoeff<F>>(&self, x_s: &mut [T]) {
375 #[cfg(all(feature = "cuda", target_arch = "x86_64"))]
376 if self.size >= 32 && std::mem::size_of::<T>() == 32 {
378 let result = snarkvm_algorithms_cuda::NTT(
379 self.size as usize,
380 x_s,
381 snarkvm_algorithms_cuda::NTTInputOutputOrder::NN,
382 snarkvm_algorithms_cuda::NTTDirection::Forward,
383 snarkvm_algorithms_cuda::NTTType::Standard,
384 );
385 if result.is_ok() {
386 return;
387 }
388 }
389
390 let pc = self.precompute_fft();
391 self.fft_helper_in_place_with_pc(x_s, FFTOrder::II, &pc)
392 }
393
394 pub fn in_order_fft_with_pc<T: DomainCoeff<F>>(&self, x_s: &[T], pc: &FFTPrecomputation<F>) -> Vec<T> {
395 let mut x_s = x_s.to_vec();
396 if self.size() != x_s.len() {
397 x_s.extend(core::iter::repeat(T::zero()).take(self.size() - x_s.len()));
398 }
399 self.fft_helper_in_place_with_pc(&mut x_s, FFTOrder::II, pc);
400 x_s
401 }
402
403 pub(crate) fn in_order_ifft_in_place<T: DomainCoeff<F>>(&self, x_s: &mut [T]) {
404 #[cfg(all(feature = "cuda", target_arch = "x86_64"))]
405 if self.size >= 32 && std::mem::size_of::<T>() == 32 {
407 let result = snarkvm_algorithms_cuda::NTT(
408 self.size as usize,
409 x_s,
410 snarkvm_algorithms_cuda::NTTInputOutputOrder::NN,
411 snarkvm_algorithms_cuda::NTTDirection::Inverse,
412 snarkvm_algorithms_cuda::NTTType::Standard,
413 );
414 if result.is_ok() {
415 return;
416 }
417 }
418
419 let pc = self.precompute_ifft();
420 self.ifft_helper_in_place_with_pc(x_s, FFTOrder::II, &pc);
421 cfg_iter_mut!(x_s).for_each(|val| *val *= self.size_inv);
422 }
423
424 pub(crate) fn in_order_coset_ifft_in_place<T: DomainCoeff<F>>(&self, x_s: &mut [T]) {
425 #[cfg(all(feature = "cuda", target_arch = "x86_64"))]
426 if self.size >= 32 && std::mem::size_of::<T>() == 32 {
428 let result = snarkvm_algorithms_cuda::NTT(
429 self.size as usize,
430 x_s,
431 snarkvm_algorithms_cuda::NTTInputOutputOrder::NN,
432 snarkvm_algorithms_cuda::NTTDirection::Inverse,
433 snarkvm_algorithms_cuda::NTTType::Coset,
434 );
435 if result.is_ok() {
436 return;
437 }
438 }
439
440 let pc = self.precompute_ifft();
441 self.ifft_helper_in_place_with_pc(x_s, FFTOrder::II, &pc);
442 let coset_shift = self.generator_inv;
443 Self::distribute_powers_and_mul_by_const(x_s, coset_shift, self.size_inv);
444 }
445
446 #[allow(unused)]
447 pub(crate) fn in_order_fft_in_place_with_pc<T: DomainCoeff<F>>(
448 &self,
449 x_s: &mut [T],
450 pre_comp: &FFTPrecomputation<F>,
451 ) {
452 #[cfg(all(feature = "cuda", target_arch = "x86_64"))]
453 if self.size >= 32 && std::mem::size_of::<T>() == 32 {
455 let result = snarkvm_algorithms_cuda::NTT(
456 self.size as usize,
457 x_s,
458 snarkvm_algorithms_cuda::NTTInputOutputOrder::NN,
459 snarkvm_algorithms_cuda::NTTDirection::Forward,
460 snarkvm_algorithms_cuda::NTTType::Standard,
461 );
462 if result.is_ok() {
463 return;
464 }
465 }
466
467 self.fft_helper_in_place_with_pc(x_s, FFTOrder::II, pre_comp)
468 }
469
470 pub(crate) fn out_order_fft_in_place_with_pc<T: DomainCoeff<F>>(
471 &self,
472 x_s: &mut [T],
473 pre_comp: &FFTPrecomputation<F>,
474 ) {
475 self.fft_helper_in_place_with_pc(x_s, FFTOrder::IO, pre_comp)
476 }
477
478 pub(crate) fn in_order_ifft_in_place_with_pc<T: DomainCoeff<F>>(
479 &self,
480 x_s: &mut [T],
481 pre_comp: &IFFTPrecomputation<F>,
482 ) {
483 #[cfg(all(feature = "cuda", target_arch = "x86_64"))]
484 if self.size >= 32 && std::mem::size_of::<T>() == 32 {
486 let result = snarkvm_algorithms_cuda::NTT(
487 self.size as usize,
488 x_s,
489 snarkvm_algorithms_cuda::NTTInputOutputOrder::NN,
490 snarkvm_algorithms_cuda::NTTDirection::Inverse,
491 snarkvm_algorithms_cuda::NTTType::Standard,
492 );
493 if result.is_ok() {
494 return;
495 }
496 }
497
498 self.ifft_helper_in_place_with_pc(x_s, FFTOrder::II, pre_comp);
499 cfg_iter_mut!(x_s).for_each(|val| *val *= self.size_inv);
500 }
501
502 pub(crate) fn out_order_ifft_in_place_with_pc<T: DomainCoeff<F>>(
503 &self,
504 x_s: &mut [T],
505 pre_comp: &IFFTPrecomputation<F>,
506 ) {
507 self.ifft_helper_in_place_with_pc(x_s, FFTOrder::OI, pre_comp);
508 cfg_iter_mut!(x_s).for_each(|val| *val *= self.size_inv);
509 }
510
511 #[allow(unused)]
512 pub(crate) fn in_order_coset_ifft_in_place_with_pc<T: DomainCoeff<F>>(
513 &self,
514 x_s: &mut [T],
515 pre_comp: &IFFTPrecomputation<F>,
516 ) {
517 #[cfg(all(feature = "cuda", target_arch = "x86_64"))]
518 if self.size >= 32 && std::mem::size_of::<T>() == 32 {
520 let result = snarkvm_algorithms_cuda::NTT(
521 self.size as usize,
522 x_s,
523 snarkvm_algorithms_cuda::NTTInputOutputOrder::NN,
524 snarkvm_algorithms_cuda::NTTDirection::Inverse,
525 snarkvm_algorithms_cuda::NTTType::Coset,
526 );
527 if result.is_ok() {
528 return;
529 }
530 }
531
532 self.ifft_helper_in_place_with_pc(x_s, FFTOrder::II, pre_comp);
533 let coset_shift = self.generator_inv;
534 Self::distribute_powers_and_mul_by_const(x_s, coset_shift, self.size_inv);
535 }
536
537 fn fft_helper_in_place_with_pc<T: DomainCoeff<F>>(
538 &self,
539 x_s: &mut [T],
540 ord: FFTOrder,
541 pre_comp: &FFTPrecomputation<F>,
542 ) {
543 use FFTOrder::*;
544 let pc = pre_comp.precomputation_for_subdomain(self).unwrap();
545
546 let log_len = log2(x_s.len());
547
548 if ord == OI {
549 self.oi_helper_with_roots(x_s, &pc.roots);
550 } else {
551 self.io_helper_with_roots(x_s, &pc.roots);
552 }
553
554 if ord == II {
555 derange_helper(x_s, log_len);
556 }
557 }
558
559 fn ifft_helper_in_place_with_pc<T: DomainCoeff<F>>(
563 &self,
564 x_s: &mut [T],
565 ord: FFTOrder,
566 pre_comp: &IFFTPrecomputation<F>,
567 ) {
568 use FFTOrder::*;
569 let pc = pre_comp.precomputation_for_subdomain(self).unwrap();
570
571 let log_len = log2(x_s.len());
572
573 if ord == II {
574 derange_helper(x_s, log_len);
575 }
576
577 if ord == IO {
578 self.io_helper_with_roots(x_s, &pc.inverse_roots);
579 } else {
580 self.oi_helper_with_roots(x_s, &pc.inverse_roots);
581 }
582 }
583
584 #[cfg(feature = "serial")]
588 pub fn roots_of_unity(&self, root: F) -> Vec<F> {
589 compute_powers_serial((self.size as usize) / 2, root)
590 }
591
592 #[cfg(not(feature = "serial"))]
594 pub fn roots_of_unity(&self, root: F) -> Vec<F> {
595 let log_size = log2(self.size as usize);
597 if log_size <= LOG_ROOTS_OF_UNITY_PARALLEL_SIZE {
599 compute_powers_serial((self.size as usize) / 2, root)
600 } else {
601 let mut temp = root;
602 let log_powers: Vec<F> = (0..(log_size - 1))
604 .map(|_| {
605 let old_value = temp;
606 temp.square_in_place();
607 old_value
608 })
609 .collect();
610
611 let mut powers = vec![F::zero(); 1 << (log_size - 1)];
613 Self::roots_of_unity_recursive(&mut powers, &log_powers);
614 powers
615 }
616 }
617
618 #[cfg(not(feature = "serial"))]
619 fn roots_of_unity_recursive(out: &mut [F], log_powers: &[F]) {
620 assert_eq!(out.len(), 1 << log_powers.len());
621 if log_powers.len() <= LOG_ROOTS_OF_UNITY_PARALLEL_SIZE as usize {
624 out[0] = F::one();
625 for idx in 1..out.len() {
626 out[idx] = out[idx - 1] * log_powers[0];
627 }
628 return;
629 }
630
631 let (lr_lo, lr_hi) = log_powers.split_at((1 + log_powers.len()) / 2);
634 let mut scr_lo = vec![F::default(); 1 << lr_lo.len()];
635 let mut scr_hi = vec![F::default(); 1 << lr_hi.len()];
636 rayon::join(
638 || Self::roots_of_unity_recursive(&mut scr_lo, lr_lo),
639 || Self::roots_of_unity_recursive(&mut scr_hi, lr_hi),
640 );
641 out.par_chunks_mut(scr_lo.len()).zip(&scr_hi).for_each(|(out_chunk, scr_hi)| {
644 for (out_elem, scr_lo) in out_chunk.iter_mut().zip(&scr_lo) {
645 *out_elem = *scr_hi * scr_lo;
646 }
647 });
648 }
649
650 #[inline(always)]
651 fn butterfly_fn_io<T: DomainCoeff<F>>(((lo, hi), root): ((&mut T, &mut T), &F)) {
652 let neg = *lo - *hi;
653 *lo += *hi;
654 *hi = neg;
655 *hi *= *root;
656 }
657
658 #[inline(always)]
659 fn butterfly_fn_oi<T: DomainCoeff<F>>(((lo, hi), root): ((&mut T, &mut T), &F)) {
660 *hi *= *root;
661 let neg = *lo - *hi;
662 *lo += *hi;
663 *hi = neg;
664 }
665
666 #[allow(clippy::too_many_arguments)]
667 fn apply_butterfly<T: DomainCoeff<F>, G: Fn(((&mut T, &mut T), &F)) + Copy + Sync + Send>(
668 g: G,
669 xi: &mut [T],
670 roots: &[F],
671 step: usize,
672 chunk_size: usize,
673 num_chunks: usize,
674 max_threads: usize,
675 gap: usize,
676 ) {
677 cfg_chunks_mut!(xi, chunk_size).for_each(|cxi| {
678 let (lo, hi) = cxi.split_at_mut(gap);
679 if gap > MIN_GAP_SIZE_FOR_PARALLELISATION && num_chunks < max_threads {
683 cfg_iter_mut!(lo).zip(hi).zip(cfg_iter!(roots).step_by(step)).for_each(g);
684 } else {
685 lo.iter_mut().zip(hi).zip(roots.iter().step_by(step)).for_each(g);
686 }
687 });
688 }
689
690 #[allow(clippy::unnecessary_to_owned)]
691 fn io_helper_with_roots<T: DomainCoeff<F>>(&self, xi: &mut [T], roots: &[F]) {
692 let mut roots = std::borrow::Cow::Borrowed(roots);
693
694 let mut step = 1;
695 let mut first = true;
696
697 #[cfg(not(feature = "serial"))]
698 let max_threads = snarkvm_utilities::parallel::max_available_threads();
699 #[cfg(feature = "serial")]
700 let max_threads = 1;
701
702 let mut gap = xi.len() / 2;
703 while gap > 0 {
704 let chunk_size = 2 * gap;
706 let num_chunks = xi.len() / chunk_size;
707
708 if num_chunks >= MIN_NUM_CHUNKS_FOR_COMPACTION {
712 if !first {
713 roots = Cow::Owned(cfg_into_iter!(roots.into_owned()).step_by(step * 2).collect());
714 }
715 step = 1;
716 roots.to_mut().shrink_to_fit();
717 } else {
718 step = num_chunks;
719 }
720 first = false;
721
722 Self::apply_butterfly(
723 Self::butterfly_fn_io,
724 xi,
725 &roots[..],
726 step,
727 chunk_size,
728 num_chunks,
729 max_threads,
730 gap,
731 );
732
733 gap /= 2;
734 }
735 }
736
737 fn oi_helper_with_roots<T: DomainCoeff<F>>(&self, xi: &mut [T], roots_cache: &[F]) {
738 let compaction_max_size =
743 core::cmp::min(roots_cache.len() / 2, roots_cache.len() / MIN_NUM_CHUNKS_FOR_COMPACTION);
744 let mut compacted_roots = vec![F::default(); compaction_max_size];
745
746 #[cfg(not(feature = "serial"))]
747 let max_threads = snarkvm_utilities::parallel::max_available_threads();
748 #[cfg(feature = "serial")]
749 let max_threads = 1;
750
751 let mut gap = 1;
752 while gap < xi.len() {
753 let chunk_size = 2 * gap;
755 let num_chunks = xi.len() / chunk_size;
756
757 let (roots, step) = if num_chunks >= MIN_NUM_CHUNKS_FOR_COMPACTION && gap < xi.len() / 2 {
761 cfg_iter_mut!(compacted_roots[..gap])
762 .zip(cfg_iter!(roots_cache[..(gap * num_chunks)]).step_by(num_chunks))
763 .for_each(|(a, b)| *a = *b);
764 (&compacted_roots[..gap], 1)
765 } else {
766 (roots_cache, num_chunks)
767 };
768
769 Self::apply_butterfly(Self::butterfly_fn_oi, xi, roots, step, chunk_size, num_chunks, max_threads, gap);
770
771 gap *= 2;
772 }
773 }
774}
775
776const MIN_NUM_CHUNKS_FOR_COMPACTION: usize = 1 << 7;
779
780const MIN_GAP_SIZE_FOR_PARALLELISATION: usize = 1 << 10;
783
784#[cfg(not(feature = "serial"))]
786const LOG_ROOTS_OF_UNITY_PARALLEL_SIZE: u32 = 7;
787
788#[inline]
789pub(super) fn bitrev(a: u64, log_len: u32) -> u64 {
790 a.reverse_bits() >> (64 - log_len)
791}
792
793pub(crate) fn derange<T>(xi: &mut [T]) {
794 derange_helper(xi, log2(xi.len()))
795}
796
797fn derange_helper<T>(xi: &mut [T], log_len: u32) {
798 for idx in 1..(xi.len() as u64 - 1) {
799 let ridx = bitrev(idx, log_len);
800 if idx < ridx {
801 xi.swap(idx as usize, ridx as usize);
802 }
803 }
804}
805
806#[derive(PartialEq, Eq, Debug)]
807enum FFTOrder {
808 II,
810 IO,
813 OI,
816}
817
818pub(crate) fn compute_powers_serial<F: Field>(size: usize, root: F) -> Vec<F> {
819 compute_powers_and_mul_by_const_serial(size, root, F::one())
820}
821
822pub(crate) fn compute_powers_and_mul_by_const_serial<F: Field>(size: usize, root: F, c: F) -> Vec<F> {
823 let mut value = c;
824 (0..size)
825 .map(|_| {
826 let old_value = value;
827 value *= root;
828 old_value
829 })
830 .collect()
831}
832
833#[allow(unused)]
834#[cfg(not(feature = "serial"))]
835pub(crate) fn compute_powers<F: Field>(size: usize, g: F) -> Vec<F> {
836 if size < MIN_PARALLEL_CHUNK_SIZE {
837 return compute_powers_serial(size, g);
838 }
839 let num_cpus_available = max_available_threads();
841 let num_elem_per_thread = core::cmp::max(size / num_cpus_available, MIN_PARALLEL_CHUNK_SIZE);
842 let num_cpus_used = size / num_elem_per_thread;
843
844 let res: Vec<F> = (0..num_cpus_used)
846 .into_par_iter()
847 .flat_map(|i| {
848 let offset = g.pow([(i * num_elem_per_thread) as u64]);
849 let num_elements_to_compute = core::cmp::min(size - i * num_elem_per_thread, num_elem_per_thread);
852 compute_powers_and_mul_by_const_serial(num_elements_to_compute, g, offset)
853 })
854 .collect();
855 res
856}
857
858#[derive(Clone)]
860pub struct Elements<F: FftField> {
861 cur_elem: F,
862 cur_pow: u64,
863 domain: EvaluationDomain<F>,
864}
865
866impl<F: FftField> Iterator for Elements<F> {
867 type Item = F;
868
869 fn next(&mut self) -> Option<F> {
870 if self.cur_pow == self.domain.size {
871 None
872 } else {
873 let cur_elem = self.cur_elem;
874 self.cur_elem *= &self.domain.group_gen;
875 self.cur_pow += 1;
876 Some(cur_elem)
877 }
878 }
879}
880
881#[derive(Clone, Eq, PartialEq, Debug, CanonicalDeserialize, CanonicalSerialize)]
883pub struct FFTPrecomputation<F: FftField> {
884 roots: Vec<F>,
885 domain: EvaluationDomain<F>,
886}
887
888impl<F: FftField> FFTPrecomputation<F> {
889 pub fn to_ifft_precomputation(&self) -> IFFTPrecomputation<F> {
890 let mut inverse_roots = self.roots.clone();
891 snarkvm_fields::batch_inversion(&mut inverse_roots);
892 IFFTPrecomputation { inverse_roots, domain: self.domain }
893 }
894
895 pub fn precomputation_for_subdomain<'a>(&'a self, domain: &EvaluationDomain<F>) -> Option<Cow<'a, Self>> {
896 if domain.size() == 1 {
897 return Some(Cow::Owned(Self { roots: vec![], domain: *domain }));
898 }
899 if &self.domain == domain {
900 Some(Cow::Borrowed(self))
901 } else if domain.size() < self.domain.size() {
902 let size_ratio = self.domain.size() / domain.size();
903 let roots = self.roots.iter().step_by(size_ratio).copied().collect();
904 Some(Cow::Owned(Self { roots, domain: *domain }))
905 } else {
906 None
907 }
908 }
909}
910
911#[derive(Clone, Eq, PartialEq, Debug, CanonicalSerialize, CanonicalDeserialize)]
913pub struct IFFTPrecomputation<F: FftField> {
914 inverse_roots: Vec<F>,
915 domain: EvaluationDomain<F>,
916}
917
918impl<F: FftField> IFFTPrecomputation<F> {
919 pub fn precomputation_for_subdomain<'a>(&'a self, domain: &EvaluationDomain<F>) -> Option<Cow<'a, Self>> {
920 if domain.size() == 1 {
921 return Some(Cow::Owned(Self { inverse_roots: vec![], domain: *domain }));
922 }
923 if &self.domain == domain {
924 Some(Cow::Borrowed(self))
925 } else if domain.size() < self.domain.size() {
926 let size_ratio = self.domain.size() / domain.size();
927 let inverse_roots = self.inverse_roots.iter().step_by(size_ratio).copied().collect();
928 Some(Cow::Owned(Self { inverse_roots, domain: *domain }))
929 } else {
930 None
931 }
932 }
933}
934
935#[cfg(test)]
936mod tests {
937 #[cfg(all(feature = "cuda", target_arch = "x86_64"))]
938 use crate::fft::domain::FFTOrder;
939 use crate::fft::{DensePolynomial, EvaluationDomain};
940 use rand::Rng;
941 use snarkvm_curves::bls12_377::Fr;
942 use snarkvm_fields::{FftField, Field, One, Zero};
943 use snarkvm_utilities::{TestRng, Uniform};
944
945 #[test]
946 fn vanishing_polynomial_evaluation() {
947 let rng = &mut TestRng::default();
948 for coeffs in 0..10 {
949 let domain = EvaluationDomain::<Fr>::new(coeffs).unwrap();
950 let z = domain.vanishing_polynomial();
951 for _ in 0..100 {
952 let point = rng.gen();
953 assert_eq!(z.evaluate(point), domain.evaluate_vanishing_polynomial(point))
954 }
955 }
956 }
957
958 #[test]
959 fn vanishing_polynomial_vanishes_on_domain() {
960 for coeffs in 0..1000 {
961 let domain = EvaluationDomain::<Fr>::new(coeffs).unwrap();
962 let z = domain.vanishing_polynomial();
963 for point in domain.elements() {
964 assert!(z.evaluate(point).is_zero())
965 }
966 }
967 }
968
969 #[test]
970 fn size_of_elements() {
971 for coeffs in 1..10 {
972 let size = 1 << coeffs;
973 let domain = EvaluationDomain::<Fr>::new(size).unwrap();
974 let domain_size = domain.size();
975 assert_eq!(domain_size, domain.elements().count());
976 }
977 }
978
979 #[test]
980 fn elements_contents() {
981 for coeffs in 1..10 {
982 let size = 1 << coeffs;
983 let domain = EvaluationDomain::<Fr>::new(size).unwrap();
984 for (i, element) in domain.elements().enumerate() {
985 assert_eq!(element, domain.group_gen.pow([i as u64]));
986 }
987 }
988 }
989
990 #[test]
992 fn non_systematic_lagrange_coefficients_test() {
993 let mut rng = TestRng::default();
994 for domain_dimension in 1..10 {
995 let domain_size = 1 << domain_dimension;
996 let domain = EvaluationDomain::<Fr>::new(domain_size).unwrap();
997 let random_point = Fr::rand(&mut rng);
999 let lagrange_coefficients = domain.evaluate_all_lagrange_coefficients(random_point);
1000
1001 let random_polynomial = DensePolynomial::<Fr>::rand(domain_size - 1, &mut rng);
1003 let polynomial_evaluations = domain.fft(random_polynomial.coeffs());
1004 let actual_evaluations = random_polynomial.evaluate(random_point);
1005
1006 let mut interpolated_evaluation = Fr::zero();
1008 for i in 0..domain_size {
1009 interpolated_evaluation += lagrange_coefficients[i] * polynomial_evaluations[i];
1010 }
1011 assert_eq!(actual_evaluations, interpolated_evaluation);
1012 }
1013 }
1014
1015 #[test]
1017 fn systematic_lagrange_coefficients_test() {
1018 for domain_dimension in 1..5 {
1021 let domain_size = 1 << domain_dimension;
1022 let domain = EvaluationDomain::<Fr>::new(domain_size).unwrap();
1023 let all_domain_elements: Vec<Fr> = domain.elements().collect();
1024 for (i, domain_element) in all_domain_elements.iter().enumerate().take(domain_size) {
1025 let lagrange_coefficients = domain.evaluate_all_lagrange_coefficients(*domain_element);
1026 for (j, lagrange_coefficient) in lagrange_coefficients.iter().enumerate().take(domain_size) {
1027 if i == j {
1029 assert_eq!(*lagrange_coefficient, Fr::one());
1030 } else {
1031 assert_eq!(*lagrange_coefficient, Fr::zero());
1032 }
1033 }
1034 }
1035 }
1036 }
1037
1038 #[test]
1040 fn test_roots_of_unity() {
1041 let max_degree = 10;
1042 for log_domain_size in 0..max_degree {
1043 let domain_size = 1 << log_domain_size;
1044 let domain = EvaluationDomain::<Fr>::new(domain_size).unwrap();
1045 let actual_roots = domain.roots_of_unity(domain.group_gen);
1046 for &value in &actual_roots {
1047 assert!(domain.evaluate_vanishing_polynomial(value).is_zero());
1048 }
1049 let expected_roots_elements = domain.elements();
1050 for (expected, &actual) in expected_roots_elements.zip(&actual_roots) {
1051 assert_eq!(expected, actual);
1052 }
1053 assert_eq!(actual_roots.len(), domain_size / 2);
1054 }
1055 }
1056
1057 #[test]
1059 fn test_fft_correctness() {
1060 let mut rng = TestRng::default();
1065
1066 let log_degree = 5;
1068 let degree = 1 << log_degree;
1069 let random_polynomial = DensePolynomial::<Fr>::rand(degree - 1, &mut rng);
1070
1071 for log_domain_size in log_degree..(log_degree + 2) {
1072 let domain_size = 1 << log_domain_size;
1073 let domain = EvaluationDomain::<Fr>::new(domain_size).unwrap();
1074 let polynomial_evaluations = domain.fft(&random_polynomial.coeffs);
1075 let polynomial_coset_evaluations = domain.coset_fft(&random_polynomial.coeffs);
1076 for (i, x) in domain.elements().enumerate() {
1077 let coset_x = Fr::multiplicative_generator() * x;
1078
1079 assert_eq!(polynomial_evaluations[i], random_polynomial.evaluate(x));
1080 assert_eq!(polynomial_coset_evaluations[i], random_polynomial.evaluate(coset_x));
1081 }
1082
1083 let randon_polynomial_from_subgroup =
1084 DensePolynomial::from_coefficients_vec(domain.ifft(&polynomial_evaluations));
1085 let random_polynomial_from_coset =
1086 DensePolynomial::from_coefficients_vec(domain.coset_ifft(&polynomial_coset_evaluations));
1087
1088 assert_eq!(
1089 random_polynomial, randon_polynomial_from_subgroup,
1090 "degree = {degree}, domain size = {domain_size}"
1091 );
1092 assert_eq!(
1093 random_polynomial, random_polynomial_from_coset,
1094 "degree = {degree}, domain size = {domain_size}"
1095 );
1096 }
1097 }
1098
1099 #[test]
1101 fn test_fft_precomputation() {
1102 for i in 1..10 {
1103 let big_domain = EvaluationDomain::<Fr>::new(i).unwrap();
1104 let pc = big_domain.precompute_fft();
1105 for j in 1..i {
1106 let small_domain = EvaluationDomain::<Fr>::new(j).unwrap();
1107 let small_pc = small_domain.precompute_fft();
1108 assert_eq!(pc.precomputation_for_subdomain(&small_domain).unwrap().as_ref(), &small_pc);
1109 }
1110 }
1111 }
1112
1113 #[test]
1115 fn test_ifft_precomputation() {
1116 for i in 1..10 {
1117 let big_domain = EvaluationDomain::<Fr>::new(i).unwrap();
1118 let pc = big_domain.precompute_ifft();
1119 for j in 1..i {
1120 let small_domain = EvaluationDomain::<Fr>::new(j).unwrap();
1121 let small_pc = small_domain.precompute_ifft();
1122 assert_eq!(pc.precomputation_for_subdomain(&small_domain).unwrap().as_ref(), &small_pc);
1123 }
1124 }
1125 }
1126
1127 #[test]
1130 fn test_ifft_precomputation_from_fft() {
1131 for i in 1..10 {
1132 let domain = EvaluationDomain::<Fr>::new(i).unwrap();
1133 let pc = domain.precompute_ifft();
1134 let fft_pc = domain.precompute_fft();
1135 assert_eq!(pc, fft_pc.to_ifft_precomputation())
1136 }
1137 }
1138
1139 #[cfg(all(feature = "cuda", target_arch = "x86_64"))]
1141 #[test]
1142 fn test_fft_correctness_cuda() {
1143 let mut rng = TestRng::default();
1144 for log_domain in 2..20 {
1145 println!("Testing domain size {log_domain}");
1146 let domain_size = 1 << log_domain;
1147 let random_polynomial = DensePolynomial::<Fr>::rand(domain_size - 1, &mut rng);
1148 let mut polynomial_evaluations = random_polynomial.coeffs.clone();
1149 let mut polynomial_evaluations_cuda = random_polynomial.coeffs.clone();
1150
1151 let domain = EvaluationDomain::<Fr>::new(domain_size).unwrap();
1152 let pc = domain.precompute_fft();
1153 domain.fft_helper_in_place_with_pc(&mut polynomial_evaluations, FFTOrder::II, &pc);
1154
1155 if snarkvm_algorithms_cuda::NTT::<Fr>(
1156 domain_size,
1157 &mut polynomial_evaluations_cuda,
1158 snarkvm_algorithms_cuda::NTTInputOutputOrder::NN,
1159 snarkvm_algorithms_cuda::NTTDirection::Forward,
1160 snarkvm_algorithms_cuda::NTTType::Standard,
1161 )
1162 .is_err()
1163 {
1164 println!("cuda error!");
1165 }
1166
1167 assert_eq!(polynomial_evaluations, polynomial_evaluations_cuda, "domain size = {domain_size}");
1168
1169 if snarkvm_algorithms_cuda::NTT::<Fr>(
1171 domain_size,
1172 &mut polynomial_evaluations_cuda,
1173 snarkvm_algorithms_cuda::NTTInputOutputOrder::NN,
1174 snarkvm_algorithms_cuda::NTTDirection::Inverse,
1175 snarkvm_algorithms_cuda::NTTType::Standard,
1176 )
1177 .is_err()
1178 {
1179 println!("cuda error!");
1180 }
1181 assert_eq!(random_polynomial.coeffs, polynomial_evaluations_cuda, "domain size = {domain_size}");
1182
1183 polynomial_evaluations = random_polynomial.coeffs.clone();
1185 let domain = EvaluationDomain::<Fr>::new(domain_size).unwrap();
1186 let pc = domain.precompute_fft();
1187 EvaluationDomain::<Fr>::distribute_powers(&mut polynomial_evaluations, Fr::multiplicative_generator());
1188 domain.fft_helper_in_place_with_pc(&mut polynomial_evaluations, FFTOrder::II, &pc);
1189
1190 if snarkvm_algorithms_cuda::NTT::<Fr>(
1191 domain_size,
1192 &mut polynomial_evaluations_cuda,
1193 snarkvm_algorithms_cuda::NTTInputOutputOrder::NN,
1194 snarkvm_algorithms_cuda::NTTDirection::Forward,
1195 snarkvm_algorithms_cuda::NTTType::Coset,
1196 )
1197 .is_err()
1198 {
1199 println!("cuda error!");
1200 }
1201
1202 assert_eq!(polynomial_evaluations, polynomial_evaluations_cuda, "domain size = {domain_size}");
1203
1204 if snarkvm_algorithms_cuda::NTT::<Fr>(
1206 domain_size,
1207 &mut polynomial_evaluations_cuda,
1208 snarkvm_algorithms_cuda::NTTInputOutputOrder::NN,
1209 snarkvm_algorithms_cuda::NTTDirection::Inverse,
1210 snarkvm_algorithms_cuda::NTTType::Coset,
1211 )
1212 .is_err()
1213 {
1214 println!("cuda error!");
1215 }
1216 assert_eq!(random_polynomial.coeffs, polynomial_evaluations_cuda, "domain size = {domain_size}");
1217 }
1218 }
1219}