minimal_lexical/parse.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201
//! Parse byte iterators to float.
#![doc(hidden)]
#[cfg(feature = "compact")]
use crate::bellerophon::bellerophon;
use crate::extended_float::{extended_to_float, ExtendedFloat};
#[cfg(not(feature = "compact"))]
use crate::lemire::lemire;
use crate::num::Float;
use crate::number::Number;
use crate::slow::slow;
/// Try to parse the significant digits quickly.
///
/// This attempts a very quick parse, to deal with common cases.
///
/// * `integer` - Slice containing the integer digits.
/// * `fraction` - Slice containing the fraction digits.
#[inline]
fn parse_number_fast<'a, Iter1, Iter2>(
integer: Iter1,
fraction: Iter2,
exponent: i32,
) -> Option<Number>
where
Iter1: Iterator<Item = &'a u8>,
Iter2: Iterator<Item = &'a u8>,
{
let mut num = Number::default();
let mut integer_count: usize = 0;
let mut fraction_count: usize = 0;
for &c in integer {
integer_count += 1;
let digit = c - b'0';
num.mantissa = num.mantissa.wrapping_mul(10).wrapping_add(digit as u64);
}
for &c in fraction {
fraction_count += 1;
let digit = c - b'0';
num.mantissa = num.mantissa.wrapping_mul(10).wrapping_add(digit as u64);
}
if integer_count + fraction_count <= 19 {
// Can't overflow, since must be <= 19.
num.exponent = exponent.saturating_sub(fraction_count as i32);
Some(num)
} else {
None
}
}
/// Parse the significant digits of the float and adjust the exponent.
///
/// * `integer` - Slice containing the integer digits.
/// * `fraction` - Slice containing the fraction digits.
#[inline]
fn parse_number<'a, Iter1, Iter2>(mut integer: Iter1, mut fraction: Iter2, exponent: i32) -> Number
where
Iter1: Iterator<Item = &'a u8> + Clone,
Iter2: Iterator<Item = &'a u8> + Clone,
{
// NOTE: for performance, we do this in 2 passes:
if let Some(num) = parse_number_fast(integer.clone(), fraction.clone(), exponent) {
return num;
}
// Can only add 19 digits.
let mut num = Number::default();
let mut count = 0;
while let Some(&c) = integer.next() {
count += 1;
if count == 20 {
// Only the integer digits affect the exponent.
num.many_digits = true;
num.exponent = exponent.saturating_add(into_i32(1 + integer.count()));
return num;
} else {
let digit = c - b'0';
num.mantissa = num.mantissa * 10 + digit as u64;
}
}
// Skip leading fraction zeros.
// This is required otherwise we might have a 0 mantissa and many digits.
let mut fraction_count: usize = 0;
if count == 0 {
for &c in &mut fraction {
fraction_count += 1;
if c != b'0' {
count += 1;
let digit = c - b'0';
num.mantissa = num.mantissa * 10 + digit as u64;
break;
}
}
}
for c in fraction {
fraction_count += 1;
count += 1;
if count == 20 {
num.many_digits = true;
// This can't wrap, since we have at most 20 digits.
// We've adjusted the exponent too high by `fraction_count - 1`.
// Note: -1 is due to incrementing this loop iteration, which we
// didn't use.
num.exponent = exponent.saturating_sub(fraction_count as i32 - 1);
return num;
} else {
let digit = c - b'0';
num.mantissa = num.mantissa * 10 + digit as u64;
}
}
// No truncated digits: easy.
// Cannot overflow: <= 20 digits.
num.exponent = exponent.saturating_sub(fraction_count as i32);
num
}
/// Parse float from extracted float components.
///
/// * `integer` - Cloneable, forward iterator over integer digits.
/// * `fraction` - Cloneable, forward iterator over integer digits.
/// * `exponent` - Parsed, 32-bit exponent.
///
/// # Preconditions
/// 1. The integer should not have leading zeros.
/// 2. The fraction should not have trailing zeros.
/// 3. All bytes in `integer` and `fraction` should be valid digits,
/// in the range [`b'0', b'9'].
///
/// # Panics
///
/// Although passing garbage input will not cause memory safety issues,
/// it is very likely to cause a panic with a large number of digits, or
/// in debug mode. The big-integer arithmetic without the `alloc` feature
/// assumes a maximum, fixed-width input, which assumes at maximum a
/// value of `10^(769 + 342)`, or ~4000 bits of storage. Passing in
/// nonsensical digits may require up to ~6000 bits of storage, which will
/// panic when attempting to add it to the big integer. It is therefore
/// up to the caller to validate this input.
///
/// We cannot efficiently remove trailing zeros while only accepting a
/// forward iterator.
pub fn parse_float<'a, F, Iter1, Iter2>(integer: Iter1, fraction: Iter2, exponent: i32) -> F
where
F: Float,
Iter1: Iterator<Item = &'a u8> + Clone,
Iter2: Iterator<Item = &'a u8> + Clone,
{
// Parse the mantissa and attempt the fast and moderate-path algorithms.
let num = parse_number(integer.clone(), fraction.clone(), exponent);
// Try the fast-path algorithm.
if let Some(value) = num.try_fast_path() {
return value;
}
// Now try the moderate path algorithm.
let mut fp = moderate_path::<F>(&num);
if fp.exp < 0 {
// Undo the invalid extended float biasing.
fp.exp -= F::INVALID_FP;
fp = slow::<F, _, _>(num, fp, integer, fraction);
}
// Unable to correctly round the float using the fast or moderate algorithms.
// Fallback to a slower, but always correct algorithm. If we have
// lossy, we can't be here.
extended_to_float::<F>(fp)
}
/// Wrapper for different moderate-path algorithms.
/// A return exponent of `-1` indicates an invalid value.
#[inline]
pub fn moderate_path<F: Float>(num: &Number) -> ExtendedFloat {
#[cfg(not(feature = "compact"))]
return lemire::<F>(num);
#[cfg(feature = "compact")]
return bellerophon::<F>(num);
}
/// Convert usize into i32 without overflow.
///
/// This is needed to ensure when adjusting the exponent relative to
/// the mantissa we do not overflow for comically-long exponents.
#[inline]
fn into_i32(value: usize) -> i32 {
if value > i32::max_value() as usize {
i32::max_value()
} else {
value as i32
}
}
// Add digit to mantissa.
#[inline]
pub fn add_digit(value: u64, digit: u8) -> Option<u64> {
value.checked_mul(10)?.checked_add(digit as u64)
}