lexical_write_float/
algorithm.rs

1//! Implementation of the Dragonbox algorithm.
2//!
3//! This is modified from the Rust port of Dragonbox, available
4//! [here](https://github.com/dtolnay/dragonbox). It also uses a direct
5//! port of Dragonbox, available [here](https://github.com/jk-jeon/dragonbox/).
6//!
7//! This is therefore under an Apache 2.0/Boost Software dual-license.
8//!
9//! We use a u64 for the significant digits, even for a 32-bit integer,
10//! however, we use the proper bit shifts, etc. for the float in question,
11//! rather than clobbering the result to f64, as Rust's port does.
12//!
13//! Each one of the algorithms described here has the main implementation,
14//! according to the reference Dragonbox paper, as well as an alias for
15//! our own purposes. The existing algorithms include:
16//!
17//! 1. `compute_nearest_normal`
18//! 2. `compute_nearest_shorter`
19//! 3. `compute_left_closed_directed`
20//! 4. `compute_right_closed_directed`
21//!
22//! `compute_nearest_normal` and `compute_nearest_shorter` are used for
23//! round-nearest, tie-even and `compute_right_closed_directed` is used
24//! for round-to-zero (see below for details).
25
26#![cfg(not(feature = "compact"))]
27#![doc(hidden)]
28
29#[cfg(feature = "f16")]
30use lexical_util::bf16::bf16;
31#[cfg(feature = "f16")]
32use lexical_util::f16::f16;
33use lexical_util::format::NumberFormat;
34use lexical_util::num::{AsPrimitive, Float};
35use lexical_write_integer::decimal::{Decimal, DecimalCount};
36
37use crate::float::{ExtendedFloat80, RawFloat};
38use crate::options::{Options, RoundMode};
39use crate::shared;
40use crate::table::*;
41
42/// Optimized float-to-string algorithm for decimal strings.
43#[inline(always)]
44pub fn write_float<F: RawFloat, const FORMAT: u128>(
45    float: F,
46    bytes: &mut [u8],
47    options: &Options,
48) -> usize {
49    debug_assert!(!float.is_special());
50    debug_assert!(float >= F::ZERO);
51
52    let fp = to_decimal(float);
53    let digit_count = F::digit_count(fp.mant);
54    let sci_exp = fp.exp + digit_count as i32 - 1;
55
56    // Note that for performance reasons, we write the significant digits
57    // later into the algorithms, since we can determine the right path
58    // and write the significant digits without using an intermediate buffer
59    // in most cases.
60    write_float!(
61        float,
62        FORMAT,
63        sci_exp,
64        options,
65        write_float_scientific,
66        write_float_positive_exponent,
67        write_float_negative_exponent,
68        generic => F,
69        bytes => bytes,
70        args => fp, sci_exp, options,
71    )
72}
73
74/// Write float to string in scientific notation.
75#[inline]
76pub fn write_float_scientific<F: DragonboxFloat, const FORMAT: u128>(
77    bytes: &mut [u8],
78    fp: ExtendedFloat80,
79    sci_exp: i32,
80    options: &Options,
81) -> usize {
82    // Config options.
83    debug_assert_eq!(count_factors(10, fp.mant), 0);
84    let format = NumberFormat::<{ FORMAT }> {};
85    assert!(format.is_valid());
86    let decimal_point = options.decimal_point();
87
88    // Write the significant digits. Write at index 1, so we can shift 1
89    // for the decimal point without intermediate buffers.
90    // Won't panic if we have enough bytes to write the significant digits.
91    let digits = &mut bytes[1..];
92    let digit_count = F::write_digits(digits, fp.mant);
93
94    // Truncate and round the significant digits.
95    let (digit_count, carried) = shared::truncate_and_round_decimal(digits, digit_count, options);
96    let sci_exp = sci_exp + carried as i32;
97
98    // Determine the exact number of digits to write.
99    let exact_count = shared::min_exact_digits(digit_count, options);
100
101    // Write any trailing digits.
102    let mut cursor: usize;
103    bytes[0] = bytes[1];
104    bytes[1] = decimal_point;
105    if !format.no_exponent_without_fraction() && digit_count == 1 && options.trim_floats() {
106        cursor = 1;
107    } else if digit_count < exact_count {
108        // Adjust the number of digits written, by appending zeros.
109        cursor = digit_count + 1;
110        let zeros = exact_count - digit_count;
111        bytes[cursor..cursor + zeros].fill(b'0');
112        cursor += zeros;
113    } else if digit_count == 1 {
114        bytes[2] = b'0';
115        cursor = 3;
116    } else {
117        cursor = digit_count + 1;
118    }
119
120    // Now, write our scientific notation.
121    // Won't panic since bytes must be large enough to store all digits.
122    shared::write_exponent::<FORMAT>(bytes, &mut cursor, sci_exp, options.exponent());
123
124    cursor
125}
126
127/// Write negative float to string without scientific notation.
128///
129/// Has a negative exponent (shift right) and no scientific notation.
130#[inline]
131pub fn write_float_negative_exponent<F: DragonboxFloat, const FORMAT: u128>(
132    bytes: &mut [u8],
133    fp: ExtendedFloat80,
134    sci_exp: i32,
135    options: &Options,
136) -> usize {
137    debug_assert!(sci_exp < 0);
138    debug_assert_eq!(count_factors(10, fp.mant), 0);
139
140    // Config options.
141    let decimal_point = options.decimal_point();
142    let sci_exp = sci_exp.wrapping_neg() as usize;
143
144    // Write our 0 digits.
145    let mut cursor = sci_exp + 1;
146    debug_assert!(cursor >= 2, "must have a buffer >= 2 to write our 0 digits");
147    // We write 0 digits even over the decimal point, since we might have
148    // to round carry over. This is rare, but it could happen, and would
149    // require a shift after. The good news is: if we have a shift, we
150    // only need to move 1 digit.
151    bytes[..cursor].fill(b'0');
152
153    // Write out our significant digits.
154    // Won't panic: we have enough bytes to write the significant digits.
155    let digits = &mut bytes[cursor..];
156    let digit_count = F::write_digits(digits, fp.mant);
157
158    // Truncate and round the significant digits.
159    debug_assert!(cursor > 0, "underflowed our digits");
160    let (digit_count, carried) = shared::truncate_and_round_decimal(digits, digit_count, options);
161
162    // Handle any trailing digits.
163    let mut trimmed = false;
164    if carried && cursor == 2 {
165        // Rounded-up, and carried to the first byte, so instead of having
166        // 0.9999, we have 1.0.
167        bytes[0] = b'1';
168        if options.trim_floats() {
169            cursor = 1;
170            trimmed = true;
171        } else {
172            bytes[1] = decimal_point;
173            bytes[2] = b'0';
174            cursor = 3;
175        }
176    } else if carried {
177        // Carried, so we need to remove 1 zero before our digits.
178        bytes[1] = decimal_point;
179        bytes[cursor - 1] = bytes[cursor];
180    } else {
181        bytes[1] = decimal_point;
182        cursor += digit_count;
183    }
184
185    // Determine the exact number of digits to write.
186    let exact_count = shared::min_exact_digits(digit_count, options);
187
188    // Write any trailing digits.
189    // Cursor is 1 if we trimmed floats, in which case skip this.
190    if !trimmed && digit_count < exact_count {
191        let zeros = exact_count - digit_count;
192        bytes[cursor..cursor + zeros].fill(b'0');
193        cursor += zeros;
194    }
195
196    cursor
197}
198
199/// Write positive float to string without scientific notation.
200///
201/// Has a positive exponent (shift left) and no scientific notation.
202#[inline]
203pub fn write_float_positive_exponent<F: DragonboxFloat, const FORMAT: u128>(
204    bytes: &mut [u8],
205    fp: ExtendedFloat80,
206    sci_exp: i32,
207    options: &Options,
208) -> usize {
209    // Config options.
210    debug_assert!(sci_exp >= 0);
211    debug_assert_eq!(count_factors(10, fp.mant), 0);
212    let decimal_point = options.decimal_point();
213
214    // Write out our significant digits.
215    // Let's be optimistic and try to write without needing to move digits.
216    // This only works if the if the resulting leading digits, or `sci_exp + 1`,
217    // is greater than the written digits. If not, we have to move digits after
218    // and then adjust the decimal point. However, with truncating and remove
219    // trailing zeros, we **don't** know the exact digit count **yet**.
220    let digit_count = F::write_digits(bytes, fp.mant);
221    let (mut digit_count, carried) =
222        shared::truncate_and_round_decimal(bytes, digit_count, options);
223
224    // Now, check if we have shift digits.
225    let leading_digits = sci_exp as usize + 1 + carried as usize;
226    let mut cursor: usize;
227    let mut trimmed = false;
228    if leading_digits >= digit_count {
229        // Great: we have more leading digits than we wrote, can write trailing zeros
230        // and an optional decimal point.
231        bytes[digit_count..leading_digits].fill(b'0');
232        cursor = leading_digits;
233        digit_count = leading_digits;
234        // Only write decimal point if we're not trimming floats.
235        if !options.trim_floats() {
236            bytes[cursor] = decimal_point;
237            cursor += 1;
238            bytes[cursor] = b'0';
239            cursor += 1;
240            digit_count += 1;
241        } else {
242            trimmed = true;
243        }
244    } else {
245        // Need to shift digits internally, and write the decimal point.
246        // First, move the digits right by 1 after leading digits.
247        let count = digit_count - leading_digits;
248        let buf = &mut bytes[leading_digits..digit_count + 1];
249        assert!(buf.len() > count);
250        for i in (0..count).rev() {
251            buf[i + 1] = buf[i];
252        }
253
254        // Now, write the decimal point.
255        bytes[leading_digits] = decimal_point;
256        cursor = digit_count + 1;
257    }
258
259    // Determine the exact number of digits to write.
260    // Don't worry if we carried: we cannot write **MORE** digits if we've
261    // already previously truncated the input.
262    let exact_count = shared::min_exact_digits(digit_count, options);
263
264    // Change the number of digits written, if we need to add more or trim digits.
265    if !trimmed && exact_count > digit_count {
266        // Check if we need to write more trailing digits.
267        let zeros = exact_count - digit_count;
268        bytes[cursor..cursor + zeros].fill(b'0');
269        cursor += zeros;
270    }
271
272    cursor
273}
274
275// ALGORITHM
276// ---------
277
278/// Get an extended representation of the decimal float.
279///
280/// The returned float has a decimal exponent, and the significant digits
281/// returned to the nearest mantissa. For example, `1.5f32` will return
282/// `ExtendedFloat80 { mant: 15, exp: -1 }`, although trailing zeros
283/// might not be removed.
284///
285/// This algorithm **only** fails when `float == 0.0`, and we want to
286/// short-circuit anyway.
287#[inline(always)]
288pub fn to_decimal<F: RawFloat>(float: F) -> ExtendedFloat80 {
289    let bits = float.to_bits();
290    let mantissa_bits = bits & F::MANTISSA_MASK;
291
292    if (bits & !F::SIGN_MASK).as_u64() == 0 {
293        return extended_float(0, 0);
294    }
295
296    // Shorter interval case; proceed like Schubfach.
297    // One might think this condition is wrong, since when `exponent_bits == 1`
298    // and `two_fc == 0`, the interval is actually regular. However, it turns out
299    // that this seemingly wrong condition is actually fine, because the end
300    // result is anyway the same.
301    //
302    // [binary32]
303    // (fc-1/2) * 2^e = 1.175'494'28... * 10^-38
304    // (fc-1/4) * 2^e = 1.175'494'31... * 10^-38
305    //    fc    * 2^e = 1.175'494'35... * 10^-38
306    // (fc+1/2) * 2^e = 1.175'494'42... * 10^-38
307    //
308    // Hence, `shorter_interval_case` will return 1.175'494'4 * 10^-38.
309    // 1.175'494'3 * 10^-38 is also a correct shortest representation that will
310    // be rejected if we assume shorter interval, but 1.175'494'4 * 10^-38 is
311    // closer to the true value so it doesn't matter.
312    //
313    // [binary64]
314    // (fc-1/2) * 2^e = 2.225'073'858'507'201'13... * 10^-308
315    // (fc-1/4) * 2^e = 2.225'073'858'507'201'25... * 10^-308
316    //    fc    * 2^e = 2.225'073'858'507'201'38... * 10^-308
317    // (fc+1/2) * 2^e = 2.225'073'858'507'201'63... * 10^-308
318    //
319    // Hence, `shorter_interval_case` will return 2.225'073'858'507'201'4 *
320    // 10^-308. This is indeed of the shortest length, and it is the unique one
321    // closest to the true value among valid representations of the same length.
322
323    // Toward zero case:
324    //
325    // What we need is a compute-nearest, but with truncated digits in the
326    // truncated case. Note that we don't need the left-closed direct
327    // rounding case of `I = [w,w+)`, or right-closed directed rounding
328    // case of `I = (w−,w]`, since these produce the shortest intervals for
329    // a **float parser** assuming the rounding of the float-parser.
330    // The left-directed case assumes the float parser will round-down,
331    // while the right-directed case assumed the float parser will round-up.
332    //
333    // A few examples of this behavior is described here:
334    //    **compute_nearest_normal**
335    //
336    //    - `1.23456 => (123456, -5)` for binary32.
337    //    - `1.23456 => (123456, -5)` for binary64.
338    //    - `13.9999999999999982236431606 => (13999999999999998, -15)` for binary64.
339    //
340    //     **compute_left_closed_directed**
341    //
342    //    - `1.23456 => (12345601, -7)` for binary32.
343    //    - `1.23456 => (12345600000000002, -16)` for binary64.
344    //    - `13.9999999999999982236431606 => (13999999999999999, -15)` for binary64.
345    //
346    //     **compute_right_closed_directed**
347    //
348    //    - `1.23456 => (123456, -5)` for binary32.
349    //    - `1.23456 => (123456, -5)` for binary64.
350    //    - `13.9999999999999982236431606 => (13999999999999982, -15)` for binary64.
351
352    if mantissa_bits.as_u64() == 0 {
353        compute_round_short(float)
354    } else {
355        compute_round(float)
356    }
357}
358
359/// Compute for a simple case when rounding nearest, tie-even.
360#[inline(always)]
361pub fn compute_round_short<F: RawFloat>(float: F) -> ExtendedFloat80 {
362    compute_nearest_shorter(float)
363}
364
365/// Compute for a non-simple case when rounding nearest, tie-even.
366#[inline(always)]
367pub fn compute_round<F: RawFloat>(float: F) -> ExtendedFloat80 {
368    compute_nearest_normal(float)
369}
370
371/// Compute the interval `I = [m−w,m+w]` if even, otherwise, `(m−w,m+w)`.
372/// This is the simple case for a finite number where only the hidden bit is
373/// set.
374#[inline]
375pub fn compute_nearest_shorter<F: RawFloat>(float: F) -> ExtendedFloat80 {
376    // Compute `k` and `beta`.
377    let exponent = float.exponent();
378    let minus_k = floor_log10_pow2_minus_log10_4_over_3(exponent);
379    let beta = exponent + floor_log2_pow10(-minus_k);
380
381    // Compute `xi` and `zi`.
382    // SAFETY: safe, since value must be finite and therefore in the correct range.
383    // `-324 <= exponent <= 308`, so `x * log10(2) - log10(4 / 3)` must be in
384    // `-98 <= x <= 93`, so the final value must be in `[-93, 98]` (for f64). We
385    // have pre-computed powers for `[-292, 326]` for f64 (same logic applies
386    // for f32) so this is **ALWAYS** safe.
387    let pow5 = unsafe { F::dragonbox_power(-minus_k) };
388    let mut xi = F::compute_left_endpoint(&pow5, beta);
389    let mut zi = F::compute_right_endpoint(&pow5, beta);
390
391    // Get the interval type.
392    // Must be Round since we only use `compute_round` with a round-nearest
393    // direction.
394    let interval_type = IntervalType::Closed;
395
396    // If we don't accept the right endpoint and if the right endpoint is an
397    // integer, decrease it.
398    if !interval_type.include_right_endpoint() && is_right_endpoint::<F>(exponent) {
399        zi -= 1;
400    }
401
402    // If the left endpoint is not an integer, increase it.
403    if !(interval_type.include_left_endpoint() && is_left_endpoint::<F>(exponent)) {
404        xi += 1;
405    }
406
407    // Try bigger divisor.
408    let significand = zi / 10;
409
410    // If succeed, remove trailing zeros if necessary and return.
411    if significand * 10 >= xi {
412        let (mant, exp) = F::process_trailing_zeros(significand, minus_k + 1);
413        return extended_float(mant, exp);
414    }
415
416    // Otherwise, compute the round-up of `y`.
417    let mut significand = F::compute_round_up(&pow5, beta);
418
419    // When tie occurs, choose one of them according to the rule.
420    let bits: i32 = F::MANTISSA_SIZE;
421    let lower_threshold: i32 = -floor_log5_pow2_minus_log5_3(bits + 4) - 2 - bits;
422    let upper_threshold: i32 = -floor_log5_pow2(bits + 2) - 2 - bits;
423
424    let round_down = RoundMode::Round.prefer_round_down(significand);
425    if round_down && exponent >= lower_threshold && exponent <= upper_threshold {
426        significand -= 1;
427    } else if significand < xi {
428        significand += 1;
429    }
430
431    // Ensure we haven't re-assigned `exponent` or `minus_k`, since this
432    // is a massive potential security vulnerability.
433    debug_assert!(float.exponent() == exponent);
434    debug_assert!(minus_k == floor_log10_pow2_minus_log10_4_over_3(exponent));
435
436    extended_float(significand, minus_k)
437}
438
439/// Compute the interval `I = [m−w,m+w]` if even, otherwise, `(m−w,m+w)`.
440/// This is the normal case for a finite number with non-zero significant
441/// digits.
442#[allow(clippy::comparison_chain)] // reason="logical approach for algorithm"
443pub fn compute_nearest_normal<F: RawFloat>(float: F) -> ExtendedFloat80 {
444    let mantissa = float.mantissa().as_u64();
445    let exponent = float.exponent();
446    let is_even = mantissa % 2 == 0;
447
448    // Step 1: Schubfach multiplier calculation
449    // Compute `k` and `beta`.
450    let minus_k = floor_log10_pow2(exponent) - F::KAPPA as i32;
451    // SAFETY: safe, since value must be finite and therefore in the correct range.
452    // `-324 <= exponent <= 308`, so `x * log10(2)` must be in
453    // `-98 <= x <= 93`, so the final value must be in `[-93, 98]` (for f64). We
454    // have pre-computed powers for `[-292, 326]` for f64 (same logic applies
455    // for f32) so this is **ALWAYS** safe.
456    let pow5 = unsafe { F::dragonbox_power(-minus_k) };
457    let beta = exponent + floor_log2_pow10(-minus_k);
458
459    // Compute `zi` and `deltai`.
460    // `10^kappa <= deltai < 10^(kappa + 1)`
461    let two_fc = mantissa << 1;
462    let deltai = F::compute_delta(&pow5, beta);
463    // For the case of binary32, the result of integer check is not correct for
464    // `29711844 * 2^-82
465    // = 6.1442653300000000008655037797566933477355632930994033813476... * 10^-18`
466    // and `29711844 * 2^-81
467    // = 1.2288530660000000001731007559513386695471126586198806762695... * 10^-17`,
468    // and they are the unique counterexamples. However, since `29711844` is even,
469    // this does not cause any problem for the endpoints calculations; it can only
470    // cause a problem when we need to perform integer check for the center.
471    // Fortunately, with these inputs, that branch is never executed, so we are
472    // fine.
473    let (zi, is_z_integer) = F::compute_mul((two_fc | 1) << beta, &pow5);
474
475    // Step 2: Try larger divisor; remove trailing zeros if necessary
476    let big_divisor = pow32(10, F::KAPPA + 1);
477    let small_divisor = pow32(10, F::KAPPA);
478
479    // Using an upper bound on `zi`, we might be able to optimize the division
480    // better than the compiler; we are computing `zi / big_divisor` here.
481    let exp = F::KAPPA + 1;
482    let n_max = (1 << (F::MANTISSA_SIZE + 1)) * big_divisor as u64 - 1;
483    let mut significand = F::divide_by_pow10(zi, exp, n_max);
484    let mut r = (zi - (big_divisor as u64).wrapping_mul(significand)) as u32;
485
486    // Get the interval type.
487    // Must be Round since we only use `compute_round` with a round-nearest
488    // direction.
489    let interval_type = IntervalType::Symmetric(is_even);
490
491    // Check for short-circuit.
492    // We use this, since the `goto` statements in dragonbox are unidiomatic
493    // in Rust and lead to unmaintainable code. Using a simple closure is much
494    // simpler, however, we do store a boolean in some cases to determine
495    // if we need to short-circuit.
496    let mut should_short_circuit = true;
497    if r < deltai {
498        // Exclude the right endpoint if necessary.
499        let include_right = interval_type.include_right_endpoint();
500        if r == 0 && !include_right && is_z_integer {
501            significand -= 1;
502            r = big_divisor;
503            should_short_circuit = false;
504        }
505    } else if r > deltai {
506        should_short_circuit = false;
507    } else {
508        // `r == deltai`; compare fractional parts.
509        // Due to the more complex logic in the new dragonbox algorithm,
510        // it's much easier logically to store if we should short circuit,
511        // the default, and only mark
512        let two_fl = two_fc - 1;
513        let include_left = interval_type.include_left_endpoint();
514
515        if !include_left || exponent < F::FC_PM_HALF_LOWER || exponent > F::DIV_BY_5_THRESHOLD {
516            // If the left endpoint is not included, the condition for
517            // success is `z^(f) < delta^(f)` (odd parity).
518            // Otherwise, the inequalities on exponent ensure that
519            // `x` is not an integer, so if `z^(f) >= delta^(f)` (even parity), we in fact
520            // have strict inequality.
521            let parity = F::compute_mul_parity(two_fl, &pow5, beta).0;
522            if !parity {
523                should_short_circuit = false;
524            }
525        } else {
526            let (xi_parity, x_is_integer) = F::compute_mul_parity(two_fl, &pow5, beta);
527            if !xi_parity && !x_is_integer {
528                should_short_circuit = false;
529            }
530        }
531    }
532
533    if should_short_circuit {
534        // Short-circuit case.
535        let (mant, exp) = F::process_trailing_zeros(significand, minus_k + F::KAPPA as i32 + 1);
536        extended_float(mant, exp)
537    } else {
538        // Step 3: Find the significand with the smaller divisor
539        significand *= 10;
540
541        let dist = r - (deltai / 2) + (small_divisor / 2);
542        let approx_y_parity = ((dist ^ (small_divisor / 2)) & 1) != 0;
543
544        // Is dist divisible by `10^kappa`?
545        let (dist, is_dist_div_by_kappa) = F::check_div_pow10(dist);
546
547        // Add `dist / 10^kappa` to the significand.
548        significand += dist as u64;
549
550        if is_dist_div_by_kappa {
551            // Check `z^(f) >= epsilon^(f)`.
552            // We have either `yi == zi - epsiloni` or `yi == (zi - epsiloni) - 1`,
553            // where `yi == zi - epsiloni` if and only if `z^(f) >= epsilon^(f)`.
554            // Since there are only 2 possibilities, we only need to care about the
555            // parity. Also, `zi` and `r` should have the same parity since the divisor is
556            // an even number.
557            let (yi_parity, is_y_integer) = F::compute_mul_parity(two_fc, &pow5, beta);
558            let round_down = RoundMode::Round.prefer_round_down(significand);
559
560            if yi_parity != approx_y_parity || (is_y_integer && round_down) {
561                // If `z^(f) >= epsilon^(f)`, we might have a tie
562                // when `z^(f) == epsilon^(f)`, or equivalently, when `y` is an integer.
563                // For tie-to-up case, we can just choose the upper one.
564                significand -= 1;
565            }
566        }
567
568        // Ensure we haven't re-assigned `exponent` or `minus_k`, since this
569        // is a massive potential security vulnerability.
570        debug_assert!(float.exponent() == exponent);
571        debug_assert!(minus_k == floor_log10_pow2(exponent) - F::KAPPA as i32);
572
573        extended_float(significand, minus_k + F::KAPPA as i32)
574    }
575}
576
577/// Compute the interval `I = [w,w+)`.
578#[allow(clippy::comparison_chain)] // reason="logical approach for algorithm"
579pub fn compute_left_closed_directed<F: RawFloat>(float: F) -> ExtendedFloat80 {
580    let mantissa = float.mantissa().as_u64();
581    let exponent = float.exponent();
582
583    // Step 1: Schubfach multiplier calculation
584    // Compute `k` and `beta`.
585    let minus_k = floor_log10_pow2(exponent) - F::KAPPA as i32;
586    // SAFETY: safe, since value must be finite and therefore in the correct range.
587    // `-324 <= exponent <= 308`, so `x * log10(2)` must be in `[-98, 93]` (for
588    // f64). We have pre-computed powers for `[-292, 326]` for f64 (same logic
589    // applies for f32) so this is **ALWAYS** safe.
590    let pow5 = unsafe { F::dragonbox_power(-minus_k) };
591    let beta = exponent + floor_log2_pow10(-minus_k);
592
593    // Compute `zi` and `deltai`.
594    // `10^kappa <= deltai < 10^(kappa + 1)`
595    let two_fc = mantissa << 1;
596    let deltai = F::compute_delta(&pow5, beta);
597    let (mut xi, mut is_x_integer) = F::compute_mul(two_fc << beta, &pow5);
598
599    // Deal with the unique exceptional cases
600    // `29711844 * 2^-82
601    // = 6.1442653300000000008655037797566933477355632930994033813476... * 10^-18`
602    // and `29711844 * 2^-81
603    // = 1.2288530660000000001731007559513386695471126586198806762695... * 10^-17`
604    // for binary32.
605    if F::BITS == 32 && exponent <= -80 {
606        is_x_integer = false;
607    }
608
609    if !is_x_integer {
610        xi += 1;
611    }
612
613    // Step 2: Try larger divisor; remove trailing zeros if necessary
614    let big_divisor = pow32(10, F::KAPPA + 1);
615
616    // Using an upper bound on `xi`, we might be able to optimize the division
617    // better than the compiler; we are computing `xi / big_divisor` here.
618    let exp = F::KAPPA + 1;
619    let n_max = (1 << (F::MANTISSA_SIZE + 1)) * big_divisor as u64 - 1;
620    let mut significand = F::divide_by_pow10(xi, exp, n_max);
621    let mut r = (xi - (big_divisor as u64).wrapping_mul(significand)) as u32;
622
623    if r != 0 {
624        significand += 1;
625        r = big_divisor - r;
626    }
627
628    // Check for short-circuit.
629    // We use this, since the `goto` statements in dragonbox are unidiomatic
630    // in Rust and lead to unmaintainable code. Using a simple closure is much
631    // simpler, however, we do store a boolean in some cases to determine
632    // if we need to short-circuit.
633    let mut should_short_circuit = true;
634    if r > deltai {
635        should_short_circuit = false;
636    } else if r == deltai {
637        // Compare the fractional parts.
638        // This branch is never taken for the exceptional cases
639        // `2f_c = 29711482, e = -81`
640        // `(6.1442649164096937243516663440523473127541365101933479309082... * 10^-18)`
641        // and `2f_c = 29711482, e = -80`
642        // `(1.2288529832819387448703332688104694625508273020386695861816... * 10^-17)`.
643        let (zi_parity, is_z_integer) = F::compute_mul_parity(two_fc + 2, &pow5, beta);
644        if zi_parity || is_z_integer {
645            should_short_circuit = false;
646        }
647    }
648
649    if should_short_circuit {
650        let (mant, exp) = F::process_trailing_zeros(significand, minus_k + F::KAPPA as i32 + 1);
651        extended_float(mant, exp)
652    } else {
653        // Step 3: Find the significand with the smaller divisor
654        significand *= 10;
655        significand -= F::div_pow10(r) as u64;
656
657        // Ensure we haven't re-assigned `exponent` or `minus_k`, since this
658        // is a massive potential security vulnerability.
659        debug_assert!(float.exponent() == exponent);
660        debug_assert!(minus_k == floor_log10_pow2(exponent) - F::KAPPA as i32);
661
662        extended_float(significand, minus_k + F::KAPPA as i32)
663    }
664}
665
666/// Compute the interval `I = (w−,w]`.
667#[allow(clippy::comparison_chain, clippy::if_same_then_else)] // reason="logical approach for algorithm"
668pub fn compute_right_closed_directed<F: RawFloat>(float: F, shorter: bool) -> ExtendedFloat80 {
669    // ensure our floats have a maximum exp in the range [-324, 308].
670    assert!(F::BITS <= 64, "cannot guarantee safety invariants with 128-bit floats");
671
672    let mantissa = float.mantissa().as_u64();
673    let exponent = float.exponent();
674
675    // Step 1: Schubfach multiplier calculation
676    // Exponent must be in the range `[-324, 308]`
677    // Compute `k` and `beta`.
678    let minus_k = floor_log10_pow2(exponent - shorter as i32) - F::KAPPA as i32;
679    assert!(F::KAPPA <= 2);
680    // SAFETY: safe, since value must be finite and therefore in the correct range.
681    // `-324 <= exponent <= 308`, so `x * log10(2)` must be in [-100, 92] (for f64).
682    // We have pre-computed powers for [-292, 326] for f64 (same logic applies for
683    // f32) so this is **ALWAYS** safe.
684    let pow5: <F as DragonboxFloat>::Power = unsafe { F::dragonbox_power(-minus_k) };
685    let beta = exponent + floor_log2_pow10(-minus_k);
686
687    // Compute `zi` and `deltai`.
688    // `10^kappa <= deltai < 10^(kappa + 1)`
689    let two_fc = mantissa << 1;
690    let deltai = F::compute_delta(&pow5, beta - shorter as i32);
691    let zi = F::compute_mul(two_fc << beta, &pow5).0;
692
693    // Step 2: Try larger divisor; remove trailing zeros if necessary
694    let big_divisor = pow32(10, F::KAPPA + 1);
695
696    // Using an upper bound on `zi`, we might be able to optimize the division
697    // better than the compiler; we are computing `zi / big_divisor` here.
698    let exp = F::KAPPA + 1;
699    let n_max = (1 << (F::MANTISSA_SIZE + 1)) * big_divisor as u64 - 1;
700    let mut significand = F::divide_by_pow10(zi, exp, n_max);
701    let r = (zi - (big_divisor as u64).wrapping_mul(significand)) as u32;
702
703    // Check for short-circuit.
704    // We use this, since the `goto` statements in dragonbox are unidiomatic
705    // in Rust and lead to unmaintainable code. Using a simple closure is much
706    // simpler, however, we do store a boolean in some cases to determine
707    // if we need to short-circuit.
708    let mut should_short_circuit = true;
709    if r > deltai {
710        should_short_circuit = false;
711    } else if r == deltai {
712        // Compare the fractional parts.
713        let two_f = two_fc
714            - if shorter {
715                1
716            } else {
717                2
718            };
719        if !F::compute_mul_parity(two_f, &pow5, beta).0 {
720            should_short_circuit = false;
721        }
722    }
723
724    if should_short_circuit {
725        let (mant, exp) = F::process_trailing_zeros(significand, minus_k + F::KAPPA as i32 + 1);
726        extended_float(mant, exp)
727    } else {
728        // Step 3: Find the significand with the smaller divisor
729        significand *= 10;
730        significand -= F::div_pow10(r) as u64;
731
732        // Ensure we haven't re-assigned `exponent` or `minus_k`.
733        assert!(float.exponent() == exponent);
734        debug_assert!(
735            minus_k == floor_log10_pow2(float.exponent() - shorter as i32) - F::KAPPA as i32
736        );
737
738        extended_float(significand, minus_k + F::KAPPA as i32)
739    }
740}
741
742// DIGITS
743// ------
744
745// NOTE: Dragonbox has a heavily-branched, dubiously optimized algorithm using
746// fast division, that leads to no practical performance benefits in my
747// benchmarks, and the division algorithm is at best ~3% faster. It also tries
748// to avoid writing digits extensively, but requires division operations for
749// each step regardless, which means the **actual** overhead of said branching
750// likely exceeds any benefits. The code is also impossible to maintain, and in
751// my benchmarks is slower (by a small amount) for a 32-bit mantissa, and a
752// **lot** slower for a 64-bit mantissa, where we need to trim trailing zeros.
753
754/// Write the significant digits, when the significant digits can fit in a
755/// 32-bit integer. `log10(2**32-1) < 10`, so 10 digits is always enough.
756///
757/// Returns the number of digits written. This assumes any trailing zeros have
758/// been removed.
759#[inline(always)]
760#[allow(clippy::branches_sharing_code)] // reason="could differentiate later"
761pub fn write_digits_u32(bytes: &mut [u8], mantissa: u32) -> usize {
762    debug_assert!(bytes.len() >= 10);
763    mantissa.decimal(bytes)
764}
765
766/// Write the significant digits, when the significant digits cannot fit in a
767/// 32-bit integer.
768///
769/// Returns the number of digits written. Note that this might not be the
770/// same as the number of digits in the mantissa, since trailing zeros will
771/// be removed. `log10(2**64-1) < 20`, so 20 digits is always enough.
772#[inline(always)]
773#[allow(clippy::branches_sharing_code)] // reason="could differentiate later"
774pub fn write_digits_u64(bytes: &mut [u8], mantissa: u64) -> usize {
775    debug_assert!(bytes.len() >= 20);
776    mantissa.decimal(bytes)
777}
778
779// EXTENDED
780// --------
781
782/// Create extended float from significant digits and exponent.
783#[inline(always)]
784pub const fn extended_float(mant: u64, exp: i32) -> ExtendedFloat80 {
785    ExtendedFloat80 {
786        mant,
787        exp,
788    }
789}
790
791// COMPUTE
792// -------
793
794#[inline(always)]
795pub const fn floor_log2(mut n: u64) -> i32 {
796    let mut count = -1;
797    while n != 0 {
798        count += 1;
799        n >>= 1;
800    }
801    count
802}
803
804#[inline(always)]
805pub const fn is_endpoint(exponent: i32, lower: i32, upper: i32) -> bool {
806    exponent >= lower && exponent <= upper
807}
808
809#[inline(always)]
810pub fn is_right_endpoint<F: Float>(exponent: i32) -> bool {
811    let lower_threshold = 0;
812    let factors = count_factors(5, (1u64 << (F::MANTISSA_SIZE + 1)) + 1) + 1;
813    let upper_threshold = 2 + floor_log2(pow64(10, factors) / 3);
814    is_endpoint(exponent, lower_threshold, upper_threshold)
815}
816
817#[inline(always)]
818pub fn is_left_endpoint<F: Float>(exponent: i32) -> bool {
819    let lower_threshold = 2;
820    let factors = count_factors(5, (1u64 << (F::MANTISSA_SIZE + 2)) - 1) + 1;
821    let upper_threshold = 2 + floor_log2(pow64(10, factors) / 3);
822    is_endpoint(exponent, lower_threshold, upper_threshold)
823}
824
825// MUL
826// ---
827
828#[inline(always)]
829pub const fn umul128_upper64(x: u64, y: u64) -> u64 {
830    let p = x as u128 * y as u128;
831    (p >> 64) as u64
832}
833
834#[inline(always)]
835pub const fn umul192_upper128(x: u64, hi: u64, lo: u64) -> (u64, u64) {
836    let mut r = x as u128 * hi as u128;
837    r += umul128_upper64(x, lo) as u128;
838    ((r >> 64) as u64, r as u64)
839}
840
841#[inline(always)]
842pub const fn umul192_lower128(x: u64, yhi: u64, ylo: u64) -> (u64, u64) {
843    let hi = x.wrapping_mul(yhi);
844    let hi_lo = x as u128 * ylo as u128;
845    // NOTE: This can wrap exactly to 0, and this is desired.
846    (hi.wrapping_add((hi_lo >> 64) as u64), hi_lo as u64)
847}
848
849#[inline(always)]
850pub const fn umul96_upper64(x: u64, y: u64) -> u64 {
851    umul128_upper64(x << 32, y)
852}
853
854#[inline(always)]
855pub const fn umul96_lower64(x: u64, y: u64) -> u64 {
856    x.wrapping_mul(y)
857}
858
859// LOG
860// ---
861
862/// Calculate `x * log5(2)` quickly.
863/// Generated by `etc/log.py`.
864/// Only needs to be valid for values from `[-1492, 1492]`
865#[inline(always)]
866pub const fn floor_log5_pow2(q: i32) -> i32 {
867    q.wrapping_mul(225799) >> 19
868}
869
870/// Calculate `x * log10(2)` quickly.
871/// Generated by `etc/log.py`.
872/// Only needs to be valid for values from `[-1700, 1700]`
873#[inline(always)]
874pub const fn floor_log10_pow2(q: i32) -> i32 {
875    q.wrapping_mul(315653) >> 20
876}
877
878/// Calculate `x * log2(10)` quickly.
879/// Generated by `etc/log.py`.
880/// Only needs to be valid for values from `[-1233, 1233]`
881#[inline(always)]
882pub const fn floor_log2_pow10(q: i32) -> i32 {
883    q.wrapping_mul(1741647) >> 19
884}
885
886/// Calculate `x * log5(2) - log5(3)` quickly.
887/// Generated by `etc/log.py`.
888/// Only needs to be valid for values from `[-2427, 2427]`
889#[inline(always)]
890pub const fn floor_log5_pow2_minus_log5_3(q: i32) -> i32 {
891    q.wrapping_mul(451597).wrapping_sub(715764) >> 20
892}
893
894/// Calculate `(x * log10(2) - log10(4 / 3))` quickly.
895/// Generated by `etc/log.py`.
896/// Only needs to be valid for values from `[-1700, 1700]`
897#[inline(always)]
898pub const fn floor_log10_pow2_minus_log10_4_over_3(q: i32) -> i32 {
899    // NOTE: these values aren't actually exact:
900    //      They're off for -295 and 97, so any automated way of computing
901    //      them will also be off.
902    q.wrapping_mul(1262611).wrapping_sub(524031) >> 22
903}
904
905// POW
906// ---
907
908/// const fn to calculate `radix^exp`.
909#[inline(always)]
910pub const fn pow32(radix: u32, mut exp: u32) -> u32 {
911    let mut p = 1;
912    while exp > 0 {
913        p *= radix;
914        exp -= 1;
915    }
916    p
917}
918
919/// const fn to calculate `radix^exp`.
920#[inline(always)]
921pub const fn pow64(radix: u32, mut exp: u32) -> u64 {
922    let mut p = 1;
923    while exp > 0 {
924        p *= radix as u64;
925        exp -= 1;
926    }
927    p
928}
929
930/// Counter the number of powers of radix are in `n`.
931#[inline(always)]
932pub const fn count_factors(radix: u32, mut n: u64) -> u32 {
933    let mut c = 0;
934    while n != 0 && n % radix as u64 == 0 {
935        n /= radix as u64;
936        c += 1;
937    }
938    c
939}
940
941// DIV
942// ---
943
944// Compute `floor(n / 10^exp)` for small exp.
945// Precondition: `exp >= 0.`
946#[inline(always)]
947pub const fn divide_by_pow10_32(n: u32, exp: u32) -> u32 {
948    // Specialize for 32-bit division by 100.
949    // Compiler is supposed to generate the identical code for just writing
950    // `n / 100`, but for some reason MSVC generates an inefficient code
951    // (mul + mov for no apparent reason, instead of single imul),
952    // so we does this manually.
953    if exp == 2 {
954        ((n as u64 * 1374389535) >> 37) as u32
955    } else {
956        let divisor = pow32(exp, 10);
957        n / divisor
958    }
959}
960
961// Compute `floor(n / 10^exp)` for small exp.
962// Precondition: `n <= n_max`
963#[inline(always)]
964pub const fn divide_by_pow10_64(n: u64, exp: u32, n_max: u64) -> u64 {
965    // Specialize for 64-bit division by 1000.
966    // Ensure that the correctness condition is met.
967    if exp == 3 && n_max <= 15534100272597517998 {
968        umul128_upper64(n, 2361183241434822607) >> 7
969    } else {
970        let divisor = pow64(exp, 10);
971        n / divisor
972    }
973}
974
975// ROUNDING
976// --------
977
978impl RoundMode {
979    /// Determine if we should round down.
980    #[inline(always)]
981    pub const fn prefer_round_down(&self, significand: u64) -> bool {
982        match self {
983            RoundMode::Round => significand % 2 != 0,
984            RoundMode::Truncate => true,
985        }
986    }
987}
988
989// INTERVAL TYPE
990// -------------
991
992/// Interval types for rounding modes to compute endpoints.
993#[non_exhaustive]
994pub enum IntervalType {
995    Symmetric(bool),
996    Asymmetric(bool),
997    Closed,
998    Open,
999    LeftClosedRightOpen,
1000    RightClosedLeftOpen,
1001}
1002
1003impl IntervalType {
1004    /// Determine if the interval type is symmetric.
1005    #[inline(always)]
1006    pub fn is_symmetric(&self) -> bool {
1007        match self {
1008            Self::Symmetric(_) => true,
1009            Self::Asymmetric(_) => false,
1010            Self::Closed => true,
1011            Self::Open => true,
1012            Self::LeftClosedRightOpen => false,
1013            Self::RightClosedLeftOpen => false,
1014        }
1015    }
1016
1017    /// Determine if we include the left endpoint.
1018    #[inline(always)]
1019    pub fn include_left_endpoint(&self) -> bool {
1020        match self {
1021            Self::Symmetric(closed) => *closed,
1022            Self::Asymmetric(left_closed) => *left_closed,
1023            Self::Closed => true,
1024            Self::Open => false,
1025            Self::LeftClosedRightOpen => true,
1026            Self::RightClosedLeftOpen => false,
1027        }
1028    }
1029
1030    /// Determine if we include the right endpoint.
1031    #[inline(always)]
1032    pub fn include_right_endpoint(&self) -> bool {
1033        match self {
1034            Self::Symmetric(closed) => *closed,
1035            Self::Asymmetric(left_closed) => !*left_closed,
1036            Self::Closed => true,
1037            Self::Open => false,
1038            Self::LeftClosedRightOpen => false,
1039            Self::RightClosedLeftOpen => true,
1040        }
1041    }
1042}
1043
1044// ENDPOINTS
1045// ---------
1046
1047/// Compute the left endpoint from a 64-bit power-of-5.
1048#[inline(always)]
1049pub fn compute_left_endpoint_u64<F: DragonboxFloat>(pow5: u64, beta: i32) -> u64 {
1050    let zero_carry = pow5 >> (F::MANTISSA_SIZE as usize + 2);
1051    let mantissa_shift = 64 - F::MANTISSA_SIZE as usize - 1;
1052    (pow5 - zero_carry) >> (mantissa_shift as i32 - beta)
1053}
1054
1055#[inline(always)]
1056pub fn compute_right_endpoint_u64<F: DragonboxFloat>(pow5: u64, beta: i32) -> u64 {
1057    let zero_carry = pow5 >> (F::MANTISSA_SIZE as usize + 1);
1058    let mantissa_shift = 64 - F::MANTISSA_SIZE as usize - 1;
1059    (pow5 + zero_carry) >> (mantissa_shift as i32 - beta)
1060}
1061
1062/// Determine if we should round up for the short interval case.
1063#[inline(always)]
1064pub fn compute_round_up_u64<F: DragonboxFloat>(pow5: u64, beta: i32) -> u64 {
1065    let shift = 64 - F::MANTISSA_SIZE - 2;
1066    ((pow5 >> (shift - beta)) + 1) / 2
1067}
1068
1069// DRAGONBOX FLOAT
1070// ---------------
1071
1072/// Get the high bits from the power-of-5.
1073#[inline(always)]
1074pub const fn high(pow5: &(u64, u64)) -> u64 {
1075    pow5.0
1076}
1077
1078/// Get the low bits from the power-of-5.
1079#[inline(always)]
1080pub const fn low(pow5: &(u64, u64)) -> u64 {
1081    pow5.1
1082}
1083
1084/// ROR instruction for 32-bit type.
1085#[inline(always)]
1086pub const fn rotr32(n: u32, r: u32) -> u32 {
1087    let r = r & 31;
1088    (n >> r) | (n << (32 - r))
1089}
1090
1091/// ROR instruction for 64-bit type.
1092#[inline(always)]
1093pub const fn rotr64(n: u64, r: u64) -> u64 {
1094    let r = r & 63;
1095    (n >> r) | (n << (64 - r))
1096}
1097
1098/// Magic numbers for division by a power of 10.
1099/// Replace `n` by `floor(n / 10^N)`.
1100/// Returns true if and only if n is divisible by `10^N`.
1101/// Precondition: `n <= 10^(N+1)`
1102/// !!It takes an in-out parameter!!
1103struct Div10Info {
1104    magic_number: u32,
1105    shift_amount: i32,
1106}
1107
1108const F32_DIV10_INFO: Div10Info = Div10Info {
1109    magic_number: 6554,
1110    shift_amount: 16,
1111};
1112
1113const F64_DIV10_INFO: Div10Info = Div10Info {
1114    magic_number: 656,
1115    shift_amount: 16,
1116};
1117
1118macro_rules! check_div_pow10 {
1119    ($n:ident, $exp:literal, $float:ident, $info:ident) => {{
1120        // Make sure the computation for `max_n` does not overflow.
1121        debug_assert!($exp + 2 < floor_log10_pow2(31));
1122        debug_assert!($n as u64 <= pow64(10, $exp + 1));
1123
1124        let n = $n.wrapping_mul($info.magic_number);
1125        let mask = (1u32 << $info.shift_amount) - 1;
1126        let r = (n & mask) < $info.magic_number;
1127
1128        (n >> $info.shift_amount, r)
1129    }};
1130}
1131
1132// These constants are efficient because we can do it in 32-bits.
1133const MOD_INV_5_U32: u32 = 0xCCCC_CCCD;
1134const MOD_INV_25_U32: u32 = MOD_INV_5_U32.wrapping_mul(MOD_INV_5_U32);
1135const MOD_INV_5_U64: u64 = 0xCCCC_CCCC_CCCC_CCCD;
1136const MOD_INV_25_U64: u64 = MOD_INV_5_U64.wrapping_mul(MOD_INV_5_U64);
1137
1138macro_rules! div_pow10 {
1139    ($n:ident, $info:ident) => {{
1140        $n.wrapping_mul($info.magic_number) >> $info.shift_amount
1141    }};
1142}
1143
1144/// Trait with specialized methods for the Dragonbox algorithm.
1145pub trait DragonboxFloat: Float {
1146    /// Constant derived in Section 4.5 of the Dragonbox algorithm.
1147    const KAPPA: u32;
1148    /// Ceiling of the maximum number of float decimal digits + 1.
1149    /// Or, `ceil((MANTISSA_SIZE + 1) / log2(10)) + 1`.
1150    const DECIMAL_DIGITS: usize;
1151    const FC_PM_HALF_LOWER: i32 = -(Self::KAPPA as i32) - floor_log5_pow2(Self::KAPPA as i32);
1152    const DIV_BY_5_THRESHOLD: i32 = floor_log2_pow10(Self::KAPPA as i32 + 1);
1153
1154    type Power;
1155
1156    /// Quick calculation for the number of significant digits in the float.
1157    fn digit_count(mantissa: u64) -> usize;
1158
1159    /// Write the significant digits to a buffer.
1160    ///
1161    /// Does not handle rounding or truncated digits.
1162    fn write_digits(bytes: &mut [u8], mantissa: u64) -> usize;
1163
1164    /// Get the pre-computed Dragonbox power from the exponent.
1165    ///
1166    /// # Safety
1167    ///
1168    /// Safe as long as the exponent is within the valid power-of-5 range.
1169    unsafe fn dragonbox_power(exponent: i32) -> Self::Power;
1170
1171    /// Compute the left endpoint for the shorter interval case.
1172    fn compute_left_endpoint(pow5: &Self::Power, beta_minus_1: i32) -> u64;
1173
1174    /// Compute the right endpoint for the shorter interval case.
1175    fn compute_right_endpoint(pow5: &Self::Power, beta_minus_1: i32) -> u64;
1176
1177    /// Handle rounding-up for the short interval case.
1178    fn compute_round_up(pow5: &Self::Power, beta_minus_1: i32) -> u64;
1179
1180    fn compute_mul(u: u64, pow5: &Self::Power) -> (u64, bool);
1181    fn compute_mul_parity(two_f: u64, pow5: &Self::Power, beta_minus_1: i32) -> (bool, bool);
1182    fn compute_delta(pow5: &Self::Power, beta_minus_1: i32) -> u32;
1183
1184    /// Handle trailing zeros, conditional on the float type.
1185    fn process_trailing_zeros(mantissa: u64, exponent: i32) -> (u64, i32);
1186
1187    /// Remove trailing zeros from the float.
1188    fn remove_trailing_zeros(mantissa: u64) -> (u64, i32);
1189
1190    /// Determine if `two_f` is divisible by `2^exp`.
1191    #[inline(always)]
1192    fn divisible_by_pow2(x: u64, exp: u32) -> bool {
1193        // Preconditions: `exp >= 1 && x != 0`
1194        x.trailing_zeros() >= exp
1195    }
1196
1197    // Replace `n` by `floor(n / 10^N)`.
1198    // Returns true if and only if `n` is divisible by `10^N`.
1199    // Precondition: `n <= 10^(N+1)`
1200    fn check_div_pow10(n: u32) -> (u32, bool);
1201
1202    // Compute `floor(n / 10^N)` for small `n` and exp.
1203    // Precondition: `n <= 10^(N+1)`
1204    fn div_pow10(n: u32) -> u32;
1205
1206    // Compute `floor(n / 10^N)` for small `N`.
1207    // Precondition: `n <= n_max`
1208    fn divide_by_pow10(n: u64, exp: u32, n_max: u64) -> u64;
1209}
1210
1211impl DragonboxFloat for f32 {
1212    const KAPPA: u32 = 1;
1213    const DECIMAL_DIGITS: usize = 9;
1214
1215    type Power = u64;
1216
1217    #[inline(always)]
1218    fn digit_count(mantissa: u64) -> usize {
1219        debug_assert!(mantissa <= u32::MAX as u64);
1220        (mantissa as u32).decimal_count()
1221    }
1222
1223    #[inline(always)]
1224    fn write_digits(bytes: &mut [u8], mantissa: u64) -> usize {
1225        // NOTE: These digits are after shifting, so it can be 2**32 - 1.
1226        debug_assert!(mantissa <= u32::MAX as u64);
1227        write_digits_u32(bytes, mantissa as u32)
1228    }
1229
1230    #[inline(always)]
1231    unsafe fn dragonbox_power(exponent: i32) -> Self::Power {
1232        debug_assert!((SMALLEST_F32_POW5..=LARGEST_F32_POW5).contains(&exponent));
1233        let index = (exponent - SMALLEST_F32_POW5) as usize;
1234        // SAFETY: safe if the exponent is in the correct range.
1235        unsafe { index_unchecked!(DRAGONBOX32_POWERS_OF_FIVE[index]) }
1236    }
1237
1238    #[inline(always)]
1239    fn compute_left_endpoint(pow5: &Self::Power, beta_minus_1: i32) -> u64 {
1240        compute_left_endpoint_u64::<Self>(*pow5, beta_minus_1)
1241    }
1242
1243    #[inline(always)]
1244    fn compute_right_endpoint(pow5: &Self::Power, beta_minus_1: i32) -> u64 {
1245        compute_right_endpoint_u64::<Self>(*pow5, beta_minus_1)
1246    }
1247
1248    #[inline(always)]
1249    fn compute_round_up(pow5: &Self::Power, beta_minus_1: i32) -> u64 {
1250        compute_round_up_u64::<Self>(*pow5, beta_minus_1)
1251    }
1252
1253    #[inline(always)]
1254    fn compute_mul(u: u64, pow5: &Self::Power) -> (u64, bool) {
1255        let r = umul96_upper64(u, *pow5);
1256        (r >> 32, (r as u32) == 0)
1257    }
1258
1259    #[inline(always)]
1260    fn compute_mul_parity(two_f: u64, pow5: &Self::Power, beta: i32) -> (bool, bool) {
1261        debug_assert!((1..64).contains(&beta));
1262
1263        let r = umul96_lower64(two_f, *pow5);
1264        let parity = (r >> (64 - beta)) & 1;
1265        let is_integer = r >> (32 - beta);
1266        (parity != 0, is_integer == 0)
1267    }
1268
1269    #[inline(always)]
1270    fn compute_delta(pow5: &Self::Power, beta: i32) -> u32 {
1271        (*pow5 >> (64 - 1 - beta)) as u32
1272    }
1273
1274    #[inline(always)]
1275    fn process_trailing_zeros(mantissa: u64, exponent: i32) -> (u64, i32) {
1276        // Policy is to remove the trailing zeros.
1277        let (mantissa, trailing) = Self::remove_trailing_zeros(mantissa);
1278        (mantissa, exponent + trailing)
1279    }
1280
1281    #[inline(always)]
1282    fn remove_trailing_zeros(mantissa: u64) -> (u64, i32) {
1283        debug_assert!(mantissa <= u32::MAX as u64);
1284        debug_assert!(mantissa != 0);
1285
1286        let mut n = mantissa as u32;
1287        let mut quo: u32;
1288        let mut s: i32 = 0;
1289        loop {
1290            quo = rotr32(n.wrapping_mul(MOD_INV_25_U32), 2);
1291            if quo <= u32::MAX / 100 {
1292                n = quo;
1293                s += 2;
1294            } else {
1295                break;
1296            }
1297        }
1298
1299        quo = rotr32(n.wrapping_mul(MOD_INV_5_U32), 1);
1300        if quo <= u32::MAX / 10 {
1301            n = quo;
1302            s |= 1;
1303        }
1304        (n as u64, s)
1305    }
1306
1307    #[inline(always)]
1308    fn check_div_pow10(n: u32) -> (u32, bool) {
1309        check_div_pow10!(n, 1, f32, F32_DIV10_INFO)
1310    }
1311
1312    #[inline(always)]
1313    fn div_pow10(n: u32) -> u32 {
1314        div_pow10!(n, F32_DIV10_INFO)
1315    }
1316
1317    #[inline(always)]
1318    fn divide_by_pow10(n: u64, exp: u32, _: u64) -> u64 {
1319        divide_by_pow10_32(n as u32, exp) as u64
1320    }
1321}
1322
1323impl DragonboxFloat for f64 {
1324    const KAPPA: u32 = 2;
1325    const DECIMAL_DIGITS: usize = 17;
1326
1327    type Power = (u64, u64);
1328
1329    #[inline(always)]
1330    fn digit_count(mantissa: u64) -> usize {
1331        mantissa.decimal_count()
1332    }
1333
1334    #[inline(always)]
1335    fn write_digits(bytes: &mut [u8], mantissa: u64) -> usize {
1336        // NOTE: These digits are after shifting, so it can be 2**64 - 1.
1337        write_digits_u64(bytes, mantissa)
1338    }
1339
1340    #[inline(always)]
1341    unsafe fn dragonbox_power(exponent: i32) -> Self::Power {
1342        debug_assert!((SMALLEST_F64_POW5..=LARGEST_F64_POW5).contains(&exponent));
1343        let index = (exponent - SMALLEST_F64_POW5) as usize;
1344        // SAFETY: safe if the exponent is in the correct range.
1345        unsafe { index_unchecked!(DRAGONBOX64_POWERS_OF_FIVE[index]) }
1346    }
1347
1348    #[inline(always)]
1349    fn compute_left_endpoint(pow5: &Self::Power, beta_minus_1: i32) -> u64 {
1350        compute_left_endpoint_u64::<Self>(high(pow5), beta_minus_1)
1351    }
1352
1353    #[inline(always)]
1354    fn compute_right_endpoint(pow5: &Self::Power, beta_minus_1: i32) -> u64 {
1355        compute_right_endpoint_u64::<Self>(high(pow5), beta_minus_1)
1356    }
1357
1358    #[inline(always)]
1359    fn compute_round_up(pow5: &Self::Power, beta_minus_1: i32) -> u64 {
1360        compute_round_up_u64::<Self>(high(pow5), beta_minus_1)
1361    }
1362
1363    #[inline(always)]
1364    fn compute_mul(u: u64, pow5: &Self::Power) -> (u64, bool) {
1365        let (hi, lo) = umul192_upper128(u, high(pow5), low(pow5));
1366        (hi, lo == 0)
1367    }
1368
1369    #[inline(always)]
1370    fn compute_mul_parity(two_f: u64, pow5: &Self::Power, beta: i32) -> (bool, bool) {
1371        debug_assert!((1..64).contains(&beta));
1372
1373        let (rhi, rlo) = umul192_lower128(two_f, high(pow5), low(pow5));
1374        let parity = (rhi >> (64 - beta)) & 1;
1375        let is_integer = (rhi << beta) | (rlo >> (64 - beta));
1376        (parity != 0, is_integer == 0)
1377    }
1378
1379    #[inline(always)]
1380    fn compute_delta(pow5: &Self::Power, beta: i32) -> u32 {
1381        (high(pow5) >> (64 - 1 - beta)) as u32
1382    }
1383
1384    #[inline(always)]
1385    fn process_trailing_zeros(mantissa: u64, exponent: i32) -> (u64, i32) {
1386        // Policy is to remove the trailing zeros.
1387        // This differs from dragonbox proper, but leads to faster benchmarks.
1388        let (mantissa, trailing) = Self::remove_trailing_zeros(mantissa);
1389        (mantissa, exponent + trailing)
1390    }
1391
1392    #[inline(always)]
1393    fn remove_trailing_zeros(mantissa: u64) -> (u64, i32) {
1394        debug_assert!(mantissa != 0);
1395
1396        // This magic number is `ceil(2^90 / 10^8)`.
1397        let magic_number = 12379400392853802749u64;
1398        let nm = mantissa as u128 * magic_number as u128;
1399
1400        // Is n is divisible by 10^8?
1401        let high = (nm >> 64) as u64;
1402        let mask = (1 << (90 - 64)) - 1;
1403        let low = nm as u64;
1404        if high & mask == 0 && low < magic_number {
1405            // If yes, work with the quotient.
1406            let mut n = (high >> (90 - 64)) as u32;
1407            let mut s: i32 = 8;
1408            let mut quo: u32;
1409
1410            loop {
1411                quo = rotr32(n.wrapping_mul(MOD_INV_25_U32), 2);
1412                if quo <= u32::MAX / 100 {
1413                    n = quo;
1414                    s += 2;
1415                } else {
1416                    break;
1417                }
1418            }
1419
1420            quo = rotr32(n.wrapping_mul(MOD_INV_5_U32), 1);
1421            if quo <= u32::MAX / 10 {
1422                n = quo;
1423                s |= 1;
1424            }
1425
1426            (n as u64, s)
1427        } else {
1428            // If n is not divisible by 10^8, work with n itself.
1429            let mut n = mantissa;
1430            let mut s: i32 = 0;
1431            let mut quo: u64;
1432
1433            loop {
1434                quo = rotr64(n.wrapping_mul(MOD_INV_25_U64), 2);
1435                if quo <= u64::MAX / 100 {
1436                    n = quo;
1437                    s += 2;
1438                } else {
1439                    break;
1440                }
1441            }
1442
1443            quo = rotr64(n.wrapping_mul(MOD_INV_5_U64), 1);
1444            if quo <= u64::MAX / 10 {
1445                n = quo;
1446                s |= 1;
1447            }
1448
1449            (n, s)
1450        }
1451    }
1452
1453    #[inline(always)]
1454    fn check_div_pow10(n: u32) -> (u32, bool) {
1455        check_div_pow10!(n, 2, f64, F64_DIV10_INFO)
1456    }
1457
1458    #[inline(always)]
1459    fn div_pow10(n: u32) -> u32 {
1460        div_pow10!(n, F64_DIV10_INFO)
1461    }
1462
1463    #[inline(always)]
1464    fn divide_by_pow10(n: u64, exp: u32, n_max: u64) -> u64 {
1465        divide_by_pow10_64(n, exp, n_max)
1466    }
1467}
1468
1469#[cfg(feature = "f16")]
1470macro_rules! dragonbox_unimpl {
1471    ($($t:ident)*) => ($(
1472        impl DragonboxFloat for $t {
1473            const KAPPA: u32 = 0;
1474            const DECIMAL_DIGITS: usize = 0;
1475
1476            type Power = u64;
1477
1478            #[inline(always)]
1479            fn digit_count(_: u64) -> usize {
1480                unimplemented!()
1481            }
1482
1483            #[inline(always)]
1484            fn write_digits(_: &mut [u8], _: u64) -> usize {
1485                unimplemented!()
1486            }
1487
1488            #[inline(always)]
1489            unsafe fn dragonbox_power(_: i32) -> Self::Power {
1490                unimplemented!()
1491            }
1492
1493            #[inline(always)]
1494            fn compute_left_endpoint(_: &Self::Power, _: i32) -> u64 {
1495                unimplemented!()
1496            }
1497
1498            #[inline(always)]
1499            fn compute_right_endpoint(_: &Self::Power, _: i32) -> u64 {
1500                unimplemented!()
1501            }
1502
1503            #[inline(always)]
1504            fn compute_round_up(_: &Self::Power, _: i32) -> (u64, bool) {
1505                unimplemented!()
1506            }
1507
1508            #[inline(always)]
1509            fn compute_mul(_: u64, _: &Self::Power) -> (u64, bool) {
1510                unimplemented!()
1511            }
1512
1513            #[inline(always)]
1514            fn compute_mul_parity(_: u64, _: &Self::Power, _: i32) -> (bool, bool) {
1515                unimplemented!()
1516            }
1517
1518            #[inline(always)]
1519            fn compute_delta(_: &Self::Power, _: i32) -> u32 {
1520                unimplemented!()
1521            }
1522
1523            #[inline(always)]
1524            fn process_trailing_zeros(_: u64, _: i32) -> (u64, i32) {
1525                unimplemented!()
1526            }
1527
1528            #[inline(always)]
1529            fn remove_trailing_zeros(_: u64) -> (u64, i32) {
1530                unimplemented!()
1531            }
1532
1533            #[inline(always)]
1534            fn check_div_pow10(_: u32) -> (u32, bool) {
1535                unimplemented!()
1536            }
1537
1538            #[inline(always)]
1539            fn div_pow10(_: u32) -> u32 {
1540                unimplemented!()
1541            }
1542        }
1543    )*);
1544}
1545
1546#[cfg(feature = "f16")]
1547dragonbox_unimpl! { bf16 f16 }