1use crate::num::arithmetic::traits::{
23 ModMul, ModMulAssign, ModMulPrecomputed, ModMulPrecomputedAssign, Parity, PowerOf2,
24 WrappingSubAssign,
25};
26use crate::num::basic::integers::{PrimitiveInt, USIZE_IS_U32};
27use crate::num::basic::unsigneds::PrimitiveUnsigned;
28use crate::num::conversion::traits::{ExactFrom, HasHalf, JoinHalves, SplitInHalf, WrappingFrom};
29use crate::num::logic::traits::LeadingZeros;
30
31pub_test! {naive_mod_mul<T: PrimitiveUnsigned>(x: T, y: T, m: T) -> T {
32 assert!(x < m, "x must be reduced mod m, but {x} >= {m}");
33 assert!(y < m, "y must be reduced mod m, but {y} >= {m}");
34 let (product_1, product_0) = T::x_mul_y_to_zz(x, y);
35 T::xx_div_mod_y_to_qr(product_1, product_0, m).1
36}}
37
38const INVERT_U32_TABLE_LOG_SIZE: u64 = 9;
39
40const INVERT_U32_TABLE_SIZE: usize = 1 << INVERT_U32_TABLE_LOG_SIZE;
41
42const INVERT_U32_TABLE: [u32; INVERT_U32_TABLE_SIZE] = [
44 32737, 32673, 32609, 32546, 32483, 32420, 32357, 32295, 32233, 32171, 32109, 32048, 31987,
45 31926, 31865, 31805, 31744, 31684, 31625, 31565, 31506, 31447, 31388, 31329, 31271, 31212,
46 31154, 31097, 31039, 30982, 30924, 30868, 30811, 30754, 30698, 30642, 30586, 30530, 30475,
47 30419, 30364, 30309, 30255, 30200, 30146, 30092, 30038, 29984, 29930, 29877, 29824, 29771,
48 29718, 29666, 29613, 29561, 29509, 29457, 29405, 29354, 29303, 29251, 29200, 29150, 29099,
49 29049, 28998, 28948, 28898, 28849, 28799, 28750, 28700, 28651, 28602, 28554, 28505, 28457,
50 28409, 28360, 28313, 28265, 28217, 28170, 28123, 28075, 28029, 27982, 27935, 27889, 27842,
51 27796, 27750, 27704, 27658, 27613, 27568, 27522, 27477, 27432, 27387, 27343, 27298, 27254,
52 27209, 27165, 27121, 27078, 27034, 26990, 26947, 26904, 26861, 26818, 26775, 26732, 26690,
53 26647, 26605, 26563, 26521, 26479, 26437, 26395, 26354, 26312, 26271, 26230, 26189, 26148,
54 26108, 26067, 26026, 25986, 25946, 25906, 25866, 25826, 25786, 25747, 25707, 25668, 25628,
55 25589, 25550, 25511, 25473, 25434, 25395, 25357, 25319, 25281, 25242, 25205, 25167, 25129,
56 25091, 25054, 25016, 24979, 24942, 24905, 24868, 24831, 24794, 24758, 24721, 24685, 24649,
57 24612, 24576, 24540, 24504, 24469, 24433, 24397, 24362, 24327, 24291, 24256, 24221, 24186,
58 24151, 24117, 24082, 24047, 24013, 23979, 23944, 23910, 23876, 23842, 23808, 23774, 23741,
59 23707, 23674, 23640, 23607, 23574, 23541, 23508, 23475, 23442, 23409, 23377, 23344, 23312,
60 23279, 23247, 23215, 23183, 23151, 23119, 23087, 23055, 23023, 22992, 22960, 22929, 22898,
61 22866, 22835, 22804, 22773, 22742, 22711, 22681, 22650, 22619, 22589, 22559, 22528, 22498,
62 22468, 22438, 22408, 22378, 22348, 22318, 22289, 22259, 22229, 22200, 22171, 22141, 22112,
63 22083, 22054, 22025, 21996, 21967, 21938, 21910, 21881, 21853, 21824, 21796, 21767, 21739,
64 21711, 21683, 21655, 21627, 21599, 21571, 21544, 21516, 21488, 21461, 21433, 21406, 21379,
65 21352, 21324, 21297, 21270, 21243, 21216, 21190, 21163, 21136, 21110, 21083, 21056, 21030,
66 21004, 20977, 20951, 20925, 20899, 20873, 20847, 20821, 20795, 20769, 20744, 20718, 20693,
67 20667, 20642, 20616, 20591, 20566, 20540, 20515, 20490, 20465, 20440, 20415, 20390, 20366,
68 20341, 20316, 20292, 20267, 20243, 20218, 20194, 20170, 20145, 20121, 20097, 20073, 20049,
69 20025, 20001, 19977, 19953, 19930, 19906, 19882, 19859, 19835, 19812, 19789, 19765, 19742,
70 19719, 19696, 19672, 19649, 19626, 19603, 19581, 19558, 19535, 19512, 19489, 19467, 19444,
71 19422, 19399, 19377, 19354, 19332, 19310, 19288, 19265, 19243, 19221, 19199, 19177, 19155,
72 19133, 19112, 19090, 19068, 19046, 19025, 19003, 18982, 18960, 18939, 18917, 18896, 18875,
73 18854, 18832, 18811, 18790, 18769, 18748, 18727, 18706, 18686, 18665, 18644, 18623, 18603,
74 18582, 18561, 18541, 18520, 18500, 18479, 18459, 18439, 18419, 18398, 18378, 18358, 18338,
75 18318, 18298, 18278, 18258, 18238, 18218, 18199, 18179, 18159, 18139, 18120, 18100, 18081,
76 18061, 18042, 18022, 18003, 17984, 17964, 17945, 17926, 17907, 17888, 17869, 17850, 17831,
77 17812, 17793, 17774, 17755, 17736, 17718, 17699, 17680, 17662, 17643, 17624, 17606, 17587,
78 17569, 17551, 17532, 17514, 17496, 17477, 17459, 17441, 17423, 17405, 17387, 17369, 17351,
79 17333, 17315, 17297, 17279, 17261, 17244, 17226, 17208, 17191, 17173, 17155, 17138, 17120,
80 17103, 17085, 17068, 17051, 17033, 17016, 16999, 16982, 16964, 16947, 16930, 16913, 16896,
81 16879, 16862, 16845, 16828, 16811, 16794, 16778, 16761, 16744, 16727, 16711, 16694, 16677,
82 16661, 16644, 16628, 16611, 16595, 16578, 16562, 16546, 16529, 16513, 16497, 16481, 16464,
83 16448, 16432, 16416, 16400, 16384,
84];
85
86#[cfg(feature = "test_build")]
87pub fn test_invert_u32_table() {
88 for (i, &x) in INVERT_U32_TABLE.iter().enumerate() {
89 let value = (u32::power_of_2(24) - u32::power_of_2(14) + u32::power_of_2(9))
90 / (u32::power_of_2(9) + u32::exact_from(i));
91 assert_eq!(
92 x, value,
93 "INVERT_U32_TABLE gives incorrect value, {x}, for index {i}"
94 );
95 }
96}
97
98pub_crate_test! {limbs_invert_limb_u32(x: u32) -> u32 {
111 assert!(x.get_highest_bit());
112 let a = INVERT_U32_TABLE[usize::exact_from(x << 1 >> 23)];
113 let b = (a << 4)
114 .wrapping_sub((u64::from(a * a) * u64::from((x >> 11) + 1)).upper_half())
115 .wrapping_sub(1);
116 let mut c = b.wrapping_mul(x >> 1).wrapping_neg();
117 if x.odd() {
118 c.wrapping_sub_assign(b.wrapping_sub(b >> 1));
119 }
120 let d = (b << 15).wrapping_add((u64::from(b) * u64::from(c)).upper_half() >> 1);
121 d.wrapping_sub(
122 (u64::from(d) * u64::from(x))
123 .wrapping_add(u64::from(x))
124 .upper_half()
125 .wrapping_add(x),
126 )
127}}
128
129const INVERT_U64_TABLE_LOG_SIZE: u64 = 8;
130
131const INVERT_U64_TABLE_SIZE: usize = 1 << INVERT_U64_TABLE_LOG_SIZE;
132
133const INVERT_U64_TABLE: [u64; INVERT_U64_TABLE_SIZE] = [
135 2045, 2037, 2029, 2021, 2013, 2005, 1998, 1990, 1983, 1975, 1968, 1960, 1953, 1946, 1938, 1931,
136 1924, 1917, 1910, 1903, 1896, 1889, 1883, 1876, 1869, 1863, 1856, 1849, 1843, 1836, 1830, 1824,
137 1817, 1811, 1805, 1799, 1792, 1786, 1780, 1774, 1768, 1762, 1756, 1750, 1745, 1739, 1733, 1727,
138 1722, 1716, 1710, 1705, 1699, 1694, 1688, 1683, 1677, 1672, 1667, 1661, 1656, 1651, 1646, 1641,
139 1636, 1630, 1625, 1620, 1615, 1610, 1605, 1600, 1596, 1591, 1586, 1581, 1576, 1572, 1567, 1562,
140 1558, 1553, 1548, 1544, 1539, 1535, 1530, 1526, 1521, 1517, 1513, 1508, 1504, 1500, 1495, 1491,
141 1487, 1483, 1478, 1474, 1470, 1466, 1462, 1458, 1454, 1450, 1446, 1442, 1438, 1434, 1430, 1426,
142 1422, 1418, 1414, 1411, 1407, 1403, 1399, 1396, 1392, 1388, 1384, 1381, 1377, 1374, 1370, 1366,
143 1363, 1359, 1356, 1352, 1349, 1345, 1342, 1338, 1335, 1332, 1328, 1325, 1322, 1318, 1315, 1312,
144 1308, 1305, 1302, 1299, 1295, 1292, 1289, 1286, 1283, 1280, 1276, 1273, 1270, 1267, 1264, 1261,
145 1258, 1255, 1252, 1249, 1246, 1243, 1240, 1237, 1234, 1231, 1228, 1226, 1223, 1220, 1217, 1214,
146 1211, 1209, 1206, 1203, 1200, 1197, 1195, 1192, 1189, 1187, 1184, 1181, 1179, 1176, 1173, 1171,
147 1168, 1165, 1163, 1160, 1158, 1155, 1153, 1150, 1148, 1145, 1143, 1140, 1138, 1135, 1133, 1130,
148 1128, 1125, 1123, 1121, 1118, 1116, 1113, 1111, 1109, 1106, 1104, 1102, 1099, 1097, 1095, 1092,
149 1090, 1088, 1086, 1083, 1081, 1079, 1077, 1074, 1072, 1070, 1068, 1066, 1064, 1061, 1059, 1057,
150 1055, 1053, 1051, 1049, 1047, 1044, 1042, 1040, 1038, 1036, 1034, 1032, 1030, 1028, 1026, 1024,
151];
152
153#[cfg(feature = "test_build")]
155pub fn test_invert_u64_table() {
156 for (i, &x) in INVERT_U64_TABLE.iter().enumerate() {
157 let value = (u64::power_of_2(19) - 3 * u64::power_of_2(8))
158 / (u64::power_of_2(8) + u64::exact_from(i));
159 assert_eq!(
160 x, value,
161 "INVERT_U64_TABLE gives incorrect value, {x}, for index {i}"
162 );
163 }
164}
165
166pub_crate_test! {limbs_invert_limb_u64(x: u64) -> u64 {
179 assert!(x.get_highest_bit());
180 let a = (x >> 24) + 1;
181 let b = INVERT_U64_TABLE[usize::exact_from(x << 1 >> 56)];
182 let c = (b << 11).wrapping_sub(((b * b).wrapping_mul(a) >> 40) + 1);
183 let d = (c.wrapping_mul(u64::power_of_2(60).wrapping_sub(c.wrapping_mul(a))) >> 47)
184 .wrapping_add(c << 13);
185 let mut e = d.wrapping_mul(x >> 1).wrapping_neg();
186 if x.odd() {
187 e.wrapping_sub_assign(d.wrapping_sub(d >> 1));
188 }
189 let f = (d << 31).wrapping_add((u128::from(d) * u128::from(e)).upper_half() >> 1);
190 f.wrapping_sub(
191 (u128::from(f) * u128::from(x))
192 .wrapping_add(u128::from(x))
193 .upper_half()
194 .wrapping_add(x),
195 )
196}}
197
198pub_test! {mod_preinverted_double<
200 T: PrimitiveUnsigned,
201 DT: From<T> + HasHalf<Half = T> + JoinHalves + PrimitiveUnsigned + SplitInHalf,
202>(
203 mut x_1: T,
204 x_0: T,
205 d: T,
206 d_inv: T,
207) -> T {
208 assert_ne!(d, T::ZERO);
209 let d_inv = DT::from(d_inv);
210 let shift = LeadingZeros::leading_zeros(d);
211 if shift == 0 {
212 if x_1 >= d {
213 x_1 -= d;
214 }
215 let (q_1, q_0) = (d_inv * DT::from(x_1))
216 .wrapping_add(DT::join_halves(x_1, x_0))
217 .split_in_half();
218 let mut r = x_0.wrapping_sub(q_1.wrapping_add(T::ONE).wrapping_mul(d));
219 if r > q_0 {
220 r.wrapping_add_assign(d);
221 }
222 if r < d {
223 r
224 } else {
225 r - d
226 }
227 } else {
228 let mut d = d;
229 if x_1 >= d {
230 let y_1 = x_1 >> (T::WIDTH - shift);
231 let y_0 = x_1 << shift;
232 d <<= shift;
233 let (q1, q0) = (d_inv * DT::from(y_1))
234 .wrapping_add(DT::join_halves(y_1, y_0))
235 .split_in_half();
236 x_1 = y_0.wrapping_sub(q1.wrapping_add(T::ONE).wrapping_mul(d));
237 if x_1 > q0 {
238 x_1.wrapping_add_assign(d);
239 }
240 if x_1 >= d {
241 x_1 -= d;
242 }
243 } else {
244 d <<= shift;
245 x_1 <<= shift;
246 }
247 let y_1 = x_1.wrapping_add(x_0 >> (T::WIDTH - shift));
248 let y_0 = x_0 << shift;
249 let (q_1, q_0) = (d_inv * DT::from(y_1))
250 .wrapping_add(DT::join_halves(y_1, y_0))
251 .split_in_half();
252 let mut r = y_0.wrapping_sub(q_1.wrapping_add(T::ONE).wrapping_mul(d));
253 if r > q_0 {
254 r.wrapping_add_assign(d);
255 }
256 if r < d {
257 r >> shift
258 } else {
259 (r - d) >> shift
260 }
261 }
262}}
263
264pub_test! {fast_mod_mul<
266 T: PrimitiveUnsigned,
267 DT: From<T> + HasHalf<Half = T> + JoinHalves + PrimitiveUnsigned + SplitInHalf,
268>(
269 x: T,
270 y: T,
271 m: T,
272 inv: T,
273) -> T {
274 assert!(x < m, "x must be reduced mod m, but {x} >= {m}");
275 assert!(y < m, "y must be reduced mod m, but {y} >= {m}");
276 let (product_1, product_0) = (DT::from(x) * DT::from(y)).split_in_half();
277 mod_preinverted_double::<T, DT>(product_1, product_0, m, inv)
278}}
279
280macro_rules! impl_mod_mul_precomputed_fast {
281 ($t:ident, $dt:ident, $invert_limb:ident) => {
282 impl ModMulPrecomputed<$t, $t> for $t {
283 type Output = $t;
284 type Data = $t;
285
286 fn precompute_mod_mul_data(&m: &$t) -> $t {
294 $invert_limb(m << LeadingZeros::leading_zeros(m))
295 }
296
297 fn mod_mul_precomputed(self, other: $t, m: $t, data: &$t) -> $t {
316 fast_mod_mul::<$t, $dt>(self, other, m, *data)
317 }
318 }
319 };
320}
321impl_mod_mul_precomputed_fast!(u32, u64, limbs_invert_limb_u32);
322impl_mod_mul_precomputed_fast!(u64, u128, limbs_invert_limb_u64);
323
324macro_rules! impl_mod_mul_precomputed_promoted {
325 ($t:ident) => {
326 impl ModMulPrecomputed<$t, $t> for $t {
327 type Output = $t;
328 type Data = u32;
329
330 fn precompute_mod_mul_data(&m: &$t) -> u32 {
338 u32::precompute_mod_mul_data(&u32::from(m))
339 }
340
341 fn mod_mul_precomputed(self, other: $t, m: $t, data: &u32) -> $t {
360 $t::wrapping_from(u32::from(self).mod_mul_precomputed(
361 u32::from(other),
362 u32::from(m),
363 data,
364 ))
365 }
366 }
367 };
368}
369impl_mod_mul_precomputed_promoted!(u8);
370impl_mod_mul_precomputed_promoted!(u16);
371
372impl ModMulPrecomputed<u128, u128> for u128 {
373 type Output = u128;
374 type Data = ();
375
376 fn precompute_mod_mul_data(_m: &u128) {}
382
383 #[inline]
401 fn mod_mul_precomputed(self, other: u128, m: u128, _data: &()) -> u128 {
402 naive_mod_mul(self, other, m)
403 }
404}
405
406impl ModMulPrecomputed<usize, usize> for usize {
407 type Output = usize;
408 type Data = usize;
409
410 fn precompute_mod_mul_data(&m: &usize) -> usize {
418 if USIZE_IS_U32 {
419 usize::wrapping_from(u32::precompute_mod_mul_data(&u32::wrapping_from(m)))
420 } else {
421 usize::wrapping_from(u64::precompute_mod_mul_data(&u64::wrapping_from(m)))
422 }
423 }
424
425 fn mod_mul_precomputed(self, other: usize, m: usize, data: &usize) -> usize {
443 if USIZE_IS_U32 {
444 usize::wrapping_from(u32::wrapping_from(self).mod_mul_precomputed(
445 u32::wrapping_from(other),
446 u32::wrapping_from(m),
447 &u32::wrapping_from(*data),
448 ))
449 } else {
450 usize::wrapping_from(u64::wrapping_from(self).mod_mul_precomputed(
451 u64::wrapping_from(other),
452 u64::wrapping_from(m),
453 &u64::wrapping_from(*data),
454 ))
455 }
456 }
457}
458
459macro_rules! impl_mod_mul {
460 ($t:ident) => {
461 impl ModMulPrecomputedAssign<$t, $t> for $t {
462 #[inline]
482 fn mod_mul_precomputed_assign(&mut self, other: $t, m: $t, data: &Self::Data) {
483 *self = self.mod_mul_precomputed(other, m, data);
484 }
485 }
486
487 impl ModMul<$t> for $t {
488 type Output = $t;
489
490 #[inline]
506 fn mod_mul(self, other: $t, m: $t) -> $t {
507 naive_mod_mul(self, other, m)
508 }
509 }
510
511 impl ModMulAssign<$t> for $t {
512 #[inline]
529 fn mod_mul_assign(&mut self, other: $t, m: $t) {
530 *self = naive_mod_mul(*self, other, m);
531 }
532 }
533 };
534}
535apply_to_unsigneds!(impl_mod_mul);