1mod arch;
2mod common;
3mod decimal;
4mod float;
5mod lemire;
6mod slow;
7mod table;
8
9use self::{common::BiasedFp, float::RawFloat, table::POWER_OF_FIVE_128};
10use crate::arch::simd_str2int;
11
12const FLOATING_LONGEST_DIGITS: usize = 17;
13const F64_BITS: u32 = 64;
14const F64_SIG_BITS: u32 = 52;
15const F64_SIG_FULL_BITS: u32 = 53;
16const F64_EXP_BIAS: i32 = 1023;
17const F64_SIG_MASK: u64 = 0x000F_FFFF_FFFF_FFFF;
18
19#[derive(Debug)]
20pub enum ParserNumber {
21 Unsigned(u64),
22 Signed(i64),
24 Float(f64),
26}
27
28#[derive(Debug)]
29pub enum Error {
30 InvalidNumber,
31 FloatMustBeFinite,
32}
33
34macro_rules! match_digit {
35 ($data:expr, $i:expr, $pattern:pat) => {
36 $i < $data.len() && matches!($data[$i], $pattern)
37 };
38}
39
40macro_rules! is_digit {
41 ($data:expr, $i:expr) => {
42 $i < $data.len() && $data[$i].is_ascii_digit()
43 };
44}
45
46macro_rules! digit {
47 ($data:expr, $i:expr) => {
48 ($data[$i] - b'0') as u64
49 };
50}
51
52macro_rules! check_digit {
53 ($data:expr, $i:expr) => {
54 if !($i < $data.len() && $data[$i].is_ascii_digit()) {
55 return Err(Error::InvalidNumber);
56 }
57 };
58}
59
60#[inline(always)]
61fn parse_exponent(data: &[u8], index: &mut usize) -> Result<i32, Error> {
62 let mut exponent: i32 = 0;
63 let mut negative = false;
64
65 if *index >= data.len() {
66 return Err(Error::InvalidNumber);
67 }
68
69 match data[*index] {
70 b'+' => *index += 1,
71 b'-' => {
72 negative = true;
73 *index += 1;
74 }
75 _ => {}
76 }
77
78 check_digit!(data, *index);
79 while exponent < 1000 && is_digit!(data, *index) {
80 exponent = digit!(data, *index) as i32 + exponent * 10;
81 *index += 1;
82 }
83 while is_digit!(data, *index) {
84 *index += 1;
85 }
86 if negative {
87 exponent = -exponent;
88 }
89 Ok(exponent)
90}
91
92const POW10_UINT: [u64; 18] = [
93 1,
94 10,
95 100,
96 1000,
97 10000,
98 100000,
99 1000000,
100 10000000,
101 100000000,
102 1000000000,
103 10000000000,
104 100000000000,
105 1000000000000,
106 10000000000000,
107 100000000000000,
108 1000000000000000,
109 10000000000000000,
110 100000000000000000,
111];
112
113#[inline(always)]
117fn parse_number_fraction(
118 data: &[u8],
119 index: &mut usize,
120 significant: &mut u64,
121 exponent: &mut i32,
122 mut need: isize,
123 dot_pos: usize,
124) -> Result<bool, Error> {
125 debug_assert!(need < FLOATING_LONGEST_DIGITS as isize);
126
127 if need > 0 {
134 if data.len() - *index >= 16 {
135 let (frac, ndigits) = unsafe { simd_str2int(&data[*index..], need as usize) };
136 *significant = *significant * POW10_UINT[ndigits] + frac;
137 *index += ndigits;
138 } else {
139 while need > 0 && is_digit!(data, *index) {
140 *significant = *significant * 10 + digit!(data, *index);
141 *index += 1;
142 need -= 1;
143 }
144 }
145 }
146
147 *exponent -= *index as i32 - dot_pos as i32;
148 let mut trunc = false;
149 while is_digit!(data, *index) {
150 trunc = true;
151 *index += 1;
152 }
153
154 if match_digit!(data, *index, b'e' | b'E') {
155 *index += 1;
156 *exponent += parse_exponent(data, &mut *index)?;
157 }
158 Ok(trunc)
159}
160
161#[inline(always)]
162pub fn parse_number(data: &[u8], index: &mut usize, negative: bool) -> Result<ParserNumber, Error> {
163 let mut significant: u64 = 0;
164 let mut exponent: i32 = 0;
165 let mut trunc = false;
166 let raw_num = &data[*index..];
167
168 if match_digit!(data, *index, b'0') {
169 *index += 1;
170
171 if *index >= data.len() || !matches!(data[*index], b'.' | b'e' | b'E') {
172 if negative {
174 return Ok(ParserNumber::Float(0.0));
175 }
176 return Ok(ParserNumber::Unsigned(0));
177 }
178
179 match data[*index] {
181 b'.' => {
182 *index += 1;
183 let dot_pos = *index;
184 check_digit!(data, *index);
185 while match_digit!(data, *index, b'0') {
186 *index += 1;
187 }
188 if match_digit!(data, *index, b'e' | b'E') {
190 *index += 1;
191 if match_digit!(data, *index, b'-' | b'+') {
192 *index += 1;
193 }
194 check_digit!(data, *index);
195 while is_digit!(data, *index) {
196 *index += 1;
197 }
198 return Ok(ParserNumber::Float(0.0));
199 }
200
201 if !is_digit!(data, *index) {
206 return Ok(ParserNumber::Float(0.0));
207 }
208
209 significant = digit!(data, *index);
210 *index += 1;
211
212 if is_digit!(data, *index) {
213 let need = FLOATING_LONGEST_DIGITS as isize - 1;
214 trunc = parse_number_fraction(
215 data,
216 index,
217 &mut significant,
218 &mut exponent,
219 need,
220 dot_pos,
221 )?;
222 } else {
223 exponent -= *index as i32 - dot_pos as i32;
224 if match_digit!(data, *index, b'e' | b'E') {
225 *index += 1;
226 exponent += parse_exponent(data, &mut *index)?;
227 }
228 }
229 }
230 b'e' | b'E' => {
231 *index += 1;
232 if match_digit!(data, *index, b'-' | b'+') {
233 *index += 1;
234 }
235 check_digit!(data, *index);
236 while is_digit!(data, *index) {
237 *index += 1;
238 }
239 return Ok(ParserNumber::Float(0.0));
240 }
241 _ => unreachable!("unreachable branch in parse_number_unchecked"),
242 }
243 } else {
244 let digit_start = *index;
246 while is_digit!(data, *index) {
247 significant = significant
250 .wrapping_mul(10)
251 .wrapping_add(digit!(data, *index));
252 *index += 1;
253 }
254 let mut digits_cnt = *index - digit_start;
255 if digits_cnt == 0 {
256 return Err(Error::InvalidNumber);
257 }
258
259 if digits_cnt > 19 {
261 *index = digit_start;
262 significant = 0;
263 digits_cnt = 0;
264 while is_digit!(data, *index) && digits_cnt < 19 {
265 significant = significant * 10 + digit!(data, *index);
266 digits_cnt += 1;
267 *index += 1;
268 }
269
270 while is_digit!(data, *index) {
272 exponent += 1;
273 *index += 1;
274 trunc = true;
275 }
276 }
277
278 if match_digit!(data, *index, b'e' | b'E') {
281 *index += 1;
283 exponent += parse_exponent(data, index)?;
284 } else if match_digit!(data, *index, b'.') {
285 *index += 1;
286 check_digit!(data, *index);
287 let dot_pos = *index;
288
289 let need = FLOATING_LONGEST_DIGITS as isize - digits_cnt as isize;
291 trunc =
292 parse_number_fraction(data, index, &mut significant, &mut exponent, need, dot_pos)?;
293 } else {
294 if exponent == 0 {
296 if negative {
297 if significant > (1u64 << 63) {
298 return Ok(ParserNumber::Float(-(significant as f64)));
299 } else {
300 return Ok(ParserNumber::Signed(0_i64.wrapping_sub(significant as i64)));
303 }
304 } else {
305 return Ok(ParserNumber::Unsigned(significant));
306 }
307 } else if exponent == 1 {
308 let last = digit!(data, *index - 1);
310 let (out, ov0) = significant.overflowing_mul(10);
311 let (out, ov1) = out.overflowing_add(last);
312 if !ov0 && !ov1 {
313 significant = out;
315 if negative {
316 return Ok(ParserNumber::Float(-(significant as f64)));
317 } else {
318 return Ok(ParserNumber::Unsigned(significant));
319 }
320 }
321 }
322 trunc = true;
323 }
324 }
325
326 parse_float(significant, exponent, negative, trunc, raw_num)
328}
329
330#[inline(always)]
331fn parse_float(
332 significant: u64,
333 exponent: i32,
334 negative: bool,
335 trunc: bool,
336 raw_num: &[u8],
337) -> Result<ParserNumber, Error> {
338 if significant >> 52 == 0 && (-22..=(22 + 15)).contains(&exponent) {
340 if let Some(mut float) = parse_float_fast(exponent, significant) {
341 if negative {
342 float = -float;
343 }
344 return Ok(ParserNumber::Float(float));
345 }
346 }
347
348 if !trunc && exponent > (-308 + 1) && exponent < (308 - 20) {
349 if let Some(raw) = parse_floating_normal_fast(exponent, significant) {
350 let mut float = f64::from_u64_bits(raw);
351 if negative {
352 float = -float;
353 }
354 return Ok(ParserNumber::Float(float));
355 }
356 }
357
358 let exponent = exponent as i64;
363 let mut fp = lemire::compute_float::<f64>(exponent, significant);
364 if trunc && fp.e >= 0 && fp != lemire::compute_float::<f64>(exponent, significant + 1) {
365 fp.e = -1;
366 }
367
368 if fp.e < 0 {
371 fp = slow::parse_long_mantissa::<f64>(raw_num);
372 }
373
374 let mut float = biased_fp_to_float::<f64>(fp);
375 if negative {
376 float = -float;
377 }
378
379 if float.is_infinite() {
381 return Err(Error::FloatMustBeFinite);
382 }
383 Ok(ParserNumber::Float(float))
384}
385
386#[inline(always)]
388fn parse_floating_normal_fast(exp10: i32, man: u64) -> Option<u64> {
389 let (mut hi, lo, hi2, add, bits);
390 let mut exp2: i32;
391 let mut exact = false;
392 let idx = exp10 + 342;
393 let sig2_ext = POWER_OF_FIVE_128[idx as usize].1;
394 let sig2 = POWER_OF_FIVE_128[idx as usize].0;
395
396 let mut lz = man.leading_zeros();
397 let sig1 = man << lz;
398 exp2 = ((217706 * exp10 - 4128768) >> 16) - lz as i32;
399
400 (lo, hi) = lemire::full_multiplication(sig1, sig2);
401
402 bits = hi & ((1u64 << (64 - 54 - 1)) - 1);
403 if bits.wrapping_sub(1) < ((1u64 << (64 - 54 - 1)) - 2) {
404 exact = true;
405 } else {
406 (_, hi2) = lemire::full_multiplication(sig1, sig2_ext);
407 add = lo.wrapping_add(hi2);
409 if add + 1 > 1u64 {
410 let carry = add < lo || add < hi2;
411 hi += carry as u64;
412 exact = true;
413 }
414 }
415
416 if exact {
417 lz = if hi < (1u64 << 63) { 1 } else { 0 };
418 hi <<= lz;
419 exp2 -= lz as i32;
420 exp2 += 64;
421
422 let round_up = (hi & (1u64 << (64 - 54))) > 0;
423 hi = hi.wrapping_add(if round_up { 1u64 << (64 - 54) } else { 0 });
424
425 if hi < (1u64 << (64 - 54)) {
426 hi = 1u64 << 63;
427 exp2 += 1;
428 }
429
430 hi >>= F64_BITS - F64_SIG_FULL_BITS;
431 exp2 += F64_BITS as i32 - F64_SIG_FULL_BITS as i32 + F64_SIG_BITS as i32;
432 exp2 += F64_EXP_BIAS;
433 let raw = ((exp2 as u64) << F64_SIG_BITS) | (hi & F64_SIG_MASK);
434 return Some(raw);
435 }
436 None
437}
438
439#[inline(always)]
440fn biased_fp_to_float<T: RawFloat>(x: BiasedFp) -> T {
442 let mut word = x.f;
443 word |= (x.e as u64) << T::MANTISSA_EXPLICIT_BITS;
444 T::from_u64_bits(word)
445}
446
447#[inline(always)]
448fn parse_float_fast(exp10: i32, significant: u64) -> Option<f64> {
449 let mut d = significant as f64;
450 if exp10 > 0 {
451 if exp10 > 22 {
452 d *= POW10_FLOAT[exp10 as usize - 22];
453 if (-1e15..=1e15).contains(&d) {
454 Some(d * POW10_FLOAT[22])
455 } else {
456 None
457 }
458 } else {
459 Some(d * POW10_FLOAT[exp10 as usize])
460 }
461 } else {
462 Some(d / POW10_FLOAT[(-exp10) as usize])
463 }
464}
465
466const POW10_FLOAT: [f64; 23] = [
467 1e-000, 1e+001,
468 1e+002, 1e+003, 1e+004, 1e+005, 1e+006, 1e+007, 1e+008, 1e+009, 1e+010, 1e+011, 1e+012, 1e+013,
469 1e+014, 1e+015, 1e+016, 1e+017, 1e+018, 1e+019, 1e+020, 1e+021,
470 1e+022, ];
472
473#[cfg(test)]
474mod test {
475 use crate::{parse_number, ParserNumber};
476
477 fn test_parse_ok(input: &str, expect: f64) {
478 assert_eq!(input.parse::<f64>().unwrap(), expect);
479
480 let mut data = input.as_bytes().to_vec();
481 data.push(b' ');
482 let mut index = 0;
483 let num = parse_number(&data, &mut index, false).unwrap();
484 assert!(
485 matches!(num, ParserNumber::Float(f) if f == expect),
486 "parsed is {:?} failed num is {}",
487 num,
488 input
489 );
490 assert_eq!(data[index], b' ', "failed num is {}", input);
491 }
492
493 #[test]
494 fn test_parse_float() {
495 test_parse_ok("0.0", 0.0);
496 test_parse_ok("0.01", 0.01);
497 test_parse_ok("0.1", 0.1);
498 test_parse_ok("0.12", 0.12);
499 test_parse_ok("0.123", 0.123);
500 test_parse_ok("0.1234", 0.1234);
501 test_parse_ok("0.12345", 0.12345);
502 test_parse_ok("0.123456", 0.123456);
503 test_parse_ok("0.1234567", 0.1234567);
504 test_parse_ok("0.12345678", 0.12345678);
505 test_parse_ok("0.123456789", 0.123456789);
506 test_parse_ok("0.1234567890", 0.1234567890);
507 test_parse_ok("0.10000000149011612", 0.10000000149011612);
508 test_parse_ok("0.06411743306171047", 0.06411743306171047);
509
510 test_parse_ok("0e-1", 0e-1);
511 test_parse_ok("0e+1000000", 0e+1000000);
512 test_parse_ok("0.001e-1", 0.001e-1);
513 test_parse_ok("0.001e+123", 0.001e+123);
514 test_parse_ok(
515 "0.000000000000000000000000001e+123",
516 0.000000000000000000000000001e+123,
517 );
518
519 test_parse_ok("1.0", 1.0);
520 test_parse_ok("1350.0", 1350.0);
521 test_parse_ok("1.10000000149011612", 1.1000000014901161);
522
523 test_parse_ok("1e0", 1e0);
524 test_parse_ok("1.0e0", 1.0e0);
525 test_parse_ok("1.0e+0", 1.0e+0);
526 test_parse_ok("1.001e-123", 1.001e-123);
527 test_parse_ok("10000000149011610000.0e-123", 1.000_000_014_901_161e-104);
528 test_parse_ok(
529 "10000000149011612123.001e-123",
530 1.000_000_014_901_161_2e-104,
531 );
532 test_parse_ok("33333333333333333333", 3.333333333333333e19);
533 test_parse_ok("135e-12", 135e-12);
534
535 test_parse_ok("12448139190673828122020e-47", 1.244813919067383e-25);
537 test_parse_ok(
538 "3469446951536141862700000000000000000e-62",
539 3.469446951536142e-26,
540 );
541 }
542}