malachite_base/num/conversion/string/
from_sci_string.rs

1// Copyright © 2025 Mikhail Hogrefe
2//
3// This file is part of Malachite.
4//
5// Malachite is free software: you can redistribute it and/or modify it under the terms of the GNU
6// Lesser General Public License (LGPL) as published by the Free Software Foundation; either version
7// 3 of the License, or (at your option) any later version. See <https://www.gnu.org/licenses/>.
8
9use crate::num::arithmetic::traits::Parity;
10use crate::num::basic::integers::PrimitiveInt;
11use crate::num::conversion::string::from_string::digit_from_display_byte;
12use crate::num::conversion::string::options::FromSciStringOptions;
13use crate::num::conversion::traits::FromSciString;
14use crate::rounding_modes::RoundingMode::*;
15use alloc::vec::Vec;
16use core::cmp::Ordering::{self, *};
17use core::str::FromStr;
18
19#[doc(hidden)]
20pub fn parse_exponent(s: &[u8]) -> Option<i64> {
21    i64::from_str(core::str::from_utf8(s).ok()?).ok()
22}
23
24#[doc(hidden)]
25pub fn validate_helper(s: &[u8], base: u8) -> Option<()> {
26    for &c in s {
27        if digit_from_display_byte(c)? >= base {
28            return None;
29        }
30    }
31    Some(())
32}
33
34#[doc(hidden)]
35pub fn is_zero_helper(s: &[u8], base: u8) -> Option<bool> {
36    let mut all_zeros = true;
37    for &c in s {
38        let d = digit_from_display_byte(c)?;
39        if d >= base {
40            return None;
41        }
42        if d != 0 {
43            all_zeros = false;
44        }
45    }
46    Some(all_zeros)
47}
48
49#[doc(hidden)]
50pub fn cmp_half_helper(s: &[u8], base: u8) -> Option<Ordering> {
51    if s.is_empty() {
52        return Some(Less);
53    }
54    let h = base >> 1;
55    let mut done = false;
56    let mut result;
57    if base.even() {
58        // 1/2 is 0.h
59        result = Equal;
60        let mut first = true;
61        for &c in s {
62            let d = digit_from_display_byte(c)?;
63            if d >= base {
64                return None;
65            }
66            if done {
67                continue;
68            }
69            if first {
70                let half_c = d.cmp(&h);
71                if half_c != Equal {
72                    result = half_c;
73                    done = true;
74                }
75                first = false;
76            } else if d != 0 {
77                result = Greater;
78                done = true;
79            }
80        }
81    } else {
82        // 1/2 is 0.hhh...
83        result = Less;
84        for &c in s {
85            let d = digit_from_display_byte(c)?;
86            if done {
87                continue;
88            }
89            let half_c = d.cmp(&h);
90            if half_c != Equal {
91                result = half_c;
92                done = true;
93            }
94        }
95    }
96    Some(result)
97}
98
99fn parse_int<T: PrimitiveInt>(cs: &[u8], base: u8) -> Option<T> {
100    // if T is unsigned, from_string_base won't handle -0
101    let mut test_neg_zero = false;
102    if T::MIN == T::ZERO {
103        if let Some(&b'-') = cs.first() {
104            test_neg_zero = true;
105        }
106    }
107    if test_neg_zero {
108        if cs.len() == 1 {
109            return None;
110        }
111        for &c in &cs[1..] {
112            if c != b'0' {
113                return None;
114            }
115        }
116        Some(T::ZERO)
117    } else {
118        T::from_string_base(base, core::str::from_utf8(cs).ok()?)
119    }
120}
121
122fn up_1<T: PrimitiveInt>(x: T, neg: bool) -> Option<T> {
123    if neg {
124        x.checked_sub(T::ONE)
125    } else {
126        x.checked_add(T::ONE)
127    }
128}
129
130#[doc(hidden)]
131pub fn preprocess_sci_string(s: &str, options: FromSciStringOptions) -> Option<(Vec<u8>, i64)> {
132    let mut s = s.as_bytes().to_vec();
133    let mut exponent = 0;
134    if options.base < 15 {
135        for (i, &c) in s.iter().enumerate().rev() {
136            if c == b'e' || c == b'E' {
137                if i == 0 || i == s.len() - 1 {
138                    return None;
139                }
140                exponent = parse_exponent(&s[i + 1..])?;
141                s.truncate(i);
142                break;
143            }
144        }
145    } else {
146        for (i, &c) in s.iter().enumerate().rev() {
147            if c == b'+' || c == b'-' {
148                if i == 0 {
149                    break;
150                }
151                if i == 1 || i == s.len() - 1 {
152                    return None;
153                }
154                let exp_indicator = s[i - 1];
155                if exp_indicator != b'e' && exp_indicator != b'E' {
156                    return None;
157                }
158                exponent = parse_exponent(&s[i..])?;
159                s.truncate(i - 1);
160                break;
161            }
162        }
163    }
164    let mut point_index = None;
165    for (i, &c) in s.iter().enumerate() {
166        if c == b'.' {
167            point_index = Some(i);
168            break;
169        }
170    }
171    if let Some(point_index) = point_index {
172        let len = s.len();
173        if point_index != len - 1 {
174            let next_char = s[point_index + 1];
175            if next_char == b'+' || next_char == b'-' {
176                return None;
177            }
178            exponent = exponent.checked_sub(i64::try_from(len - point_index - 1).ok()?)?;
179            s.copy_within(point_index + 1..len, point_index);
180        }
181        s.pop();
182    }
183    Some((s, exponent))
184}
185
186fn from_sci_string_with_options_primitive_int<T: PrimitiveInt>(
187    s: &str,
188    options: FromSciStringOptions,
189) -> Option<T> {
190    let (s, exponent) = preprocess_sci_string(s, options)?;
191    if exponent >= 0 {
192        let x = parse_int::<T>(&s, options.base)?;
193        x.checked_mul(T::wrapping_from(options.base).checked_pow(exponent.unsigned_abs())?)
194    } else {
195        let neg_exponent = usize::try_from(exponent.unsigned_abs()).ok()?;
196        let len = s.len();
197        if len == 0 {
198            return None;
199        }
200        let first = s[0];
201        let neg = first == b'-';
202        let sign = neg || first == b'+';
203        let rm = if neg {
204            -options.rounding_mode
205        } else {
206            options.rounding_mode
207        };
208        let sig_len = if sign { len - 1 } else { len };
209        if sig_len == 0 {
210            return None;
211        }
212        if neg_exponent > sig_len {
213            let s = if sign { &s[1..] } else { &s[..] };
214            return match rm {
215                Down | Floor | Nearest => {
216                    validate_helper(s, options.base)?;
217                    Some(T::ZERO)
218                }
219                Up | Ceiling => {
220                    if is_zero_helper(s, options.base)? {
221                        Some(T::ZERO)
222                    } else {
223                        up_1(T::ZERO, neg)
224                    }
225                }
226                Exact => None,
227            };
228        }
229        let (before_e, after_e) = s.split_at(len - neg_exponent);
230        let x = match before_e {
231            &[] | &[b'-'] | &[b'+'] => T::ZERO,
232            before_e => parse_int(before_e, options.base)?,
233        };
234        if after_e.is_empty() {
235            return Some(x);
236        }
237        match rm {
238            Down | Floor => {
239                validate_helper(after_e, options.base)?;
240                Some(x)
241            }
242            Up | Ceiling => {
243                if is_zero_helper(after_e, options.base)? {
244                    Some(x)
245                } else {
246                    up_1(x, neg)
247                }
248            }
249            Exact => {
250                if is_zero_helper(after_e, options.base)? {
251                    Some(x)
252                } else {
253                    None
254                }
255            }
256            Nearest => match cmp_half_helper(after_e, options.base)? {
257                Less => Some(x),
258                Greater => up_1(x, neg),
259                Equal => {
260                    if x.even() {
261                        Some(x)
262                    } else {
263                        up_1(x, neg)
264                    }
265                }
266            },
267        }
268    }
269}
270
271macro_rules! impl_from_sci_string {
272    ($t:ident) => {
273        impl FromSciString for $t {
274            /// Converts a [`String`], possibly in scientfic notation, to a primitive integer.
275            ///
276            /// Use [`FromSciStringOptions`] to specify the base (from 2 to 36, inclusive) and the
277            /// rounding mode, in case rounding is necessary because the string represents a
278            /// non-integer.
279            ///
280            /// If the base is greater than 10, the higher digits are represented by the letters
281            /// `'a'` through `'z'` or `'A'` through `'Z'`; the case doesn't matter and doesn't need
282            /// to be consistent.
283            ///
284            /// Exponents are allowed, and are indicated using the character `'e'` or `'E'`. If the
285            /// base is 15 or greater, an ambiguity arises where it may not be clear whether `'e'`
286            /// is a digit or an exponent indicator. To resolve this ambiguity, always use a `'+'`
287            /// or `'-'` sign after the exponent indicator when the base is 15 or greater.
288            ///
289            /// The exponent itself is always parsed using base 10.
290            ///
291            /// Decimal (or other-base) points are allowed. These are most useful in conjunction
292            /// with exponents, but they may be used on their own. If the string represents a
293            /// non-integer, the rounding mode specified in `options` is used to round to an
294            /// integer.
295            ///
296            /// If the string is unparseable or parses to an out-of-range integer, `None` is
297            /// returned. `None` is also returned if the rounding mode in options is `Exact`, but
298            /// rounding is necessary.
299            ///
300            /// # Worst-case complexity
301            /// $T(n) = O(n)$
302            ///
303            /// $M(n) = O(1)$
304            ///
305            /// where $T$ is time, $M$ is additional memory, and $n$ is `s.len()`.
306            ///
307            /// # Examples
308            /// See [here](super::from_sci_string).
309            #[inline]
310            fn from_sci_string_with_options(s: &str, options: FromSciStringOptions) -> Option<$t> {
311                from_sci_string_with_options_primitive_int(s, options)
312            }
313        }
314    };
315}
316apply_to_primitive_ints!(impl_from_sci_string);