1use std::{
2 borrow::Cow,
3 num::{ParseFloatError, ParseIntError},
4 str::FromStr,
5 time::Duration,
6};
7
8use rust_decimal::Decimal;
9
10use crate::{
11 sql::{
12 duration::{
13 SECONDS_PER_DAY, SECONDS_PER_HOUR, SECONDS_PER_MINUTE, SECONDS_PER_WEEK,
14 SECONDS_PER_YEAR,
15 },
16 Number,
17 },
18 syn::{
19 error::{bail, syntax_error, SyntaxError},
20 lexer::Lexer,
21 token::{t, Span, Token, TokenKind},
22 },
23};
24
25pub enum Numeric {
26 Number(Number),
27 Duration(Duration),
28}
29
30#[derive(Debug)]
32pub enum NumericKind {
33 Number(NumberKind),
34 Duration(Duration),
35}
36
37#[derive(Debug)]
38pub enum NumberKind {
39 Integer,
40 Float,
41 Decimal,
42}
43
44enum DurationSuffix {
45 Nano,
46 Micro,
47 Milli,
48 Second,
49 Minute,
50 Hour,
51 Day,
52 Week,
53 Year,
54}
55
56fn prepare_number_str(str: &str) -> Cow<str> {
57 if str.contains('_') {
58 Cow::Owned(str.chars().filter(|x| *x != '_').collect())
59 } else {
60 Cow::Borrowed(str)
61 }
62}
63
64pub fn numeric_kind(lexer: &mut Lexer, start: Token) -> Result<NumericKind, SyntaxError> {
67 match start.kind {
68 t!("-") | t!("+") => number_kind(lexer, start).map(NumericKind::Number),
69 TokenKind::Digits => match lexer.reader.peek() {
70 Some(b'n' | b's' | b'm' | b'h' | b'y' | b'w' | b'u') => {
71 duration(lexer, start).map(NumericKind::Duration)
72 }
73 Some(b'd') => {
74 if let Some(b'e') = lexer.reader.peek1() {
75 number_kind(lexer, start).map(NumericKind::Number)
76 } else {
77 duration(lexer, start).map(NumericKind::Duration)
78 }
79 }
80 Some(x) if !x.is_ascii() => duration(lexer, start).map(NumericKind::Duration),
81 _ => number_kind(lexer, start).map(NumericKind::Number),
82 },
83 x => {
84 bail!("Unexpected token `{x}`, expected a numeric value, either a duration or number",@start.span)
85 }
86 }
87}
88
89pub fn numeric(lexer: &mut Lexer, start: Token) -> Result<Numeric, SyntaxError> {
91 match start.kind {
92 t!("-") | t!("+") => number(lexer, start).map(Numeric::Number),
93 TokenKind::Digits => match lexer.reader.peek() {
94 Some(b'n' | b's' | b'm' | b'h' | b'y' | b'w' | b'u') => {
95 duration(lexer, start).map(Numeric::Duration)
96 }
97 Some(b'd') => {
98 if lexer.reader.peek1() == Some(b'e') {
99 number(lexer, start).map(Numeric::Number)
100 } else {
101 duration(lexer, start).map(Numeric::Duration)
102 }
103 }
104 Some(x) if !x.is_ascii() => duration(lexer, start).map(Numeric::Duration),
105 _ => number(lexer, start).map(Numeric::Number),
106 },
107 x => {
108 bail!("Unexpected token `{x}`, expected a numeric value, either a duration or number",@start.span)
109 }
110 }
111}
112
113pub fn number_kind(lexer: &mut Lexer, start: Token) -> Result<NumberKind, SyntaxError> {
114 let offset = start.span.offset as usize;
115 match start.kind {
116 t!("-") | t!("+") => {
117 eat_digits1(lexer, offset)?;
118 }
119 TokenKind::Digits => {}
120 x => bail!("Unexpected start token for integer: {x}",@start.span),
121 }
122
123 let mut kind = NumberKind::Integer;
124
125 let before_mantissa = lexer.reader.offset();
126 if lexer.reader.peek1().map(|x| x.is_ascii_digit()).unwrap_or(false) && lexer.eat(b'.') {
128 eat_digits1(lexer, before_mantissa)?;
129 kind = NumberKind::Float;
130 }
131
132 let before_exponent = lexer.reader.offset();
133 if lexer.eat(b'e') || lexer.eat(b'E') {
134 if !lexer.eat(b'-') {
135 lexer.eat(b'+');
136 }
137
138 eat_digits1(lexer, before_exponent)?;
139 kind = NumberKind::Float;
140 }
141
142 if !lexer.eat(b'f') {
143 if lexer.eat(b'd') {
144 lexer.expect('e')?;
145 lexer.expect('c')?;
146 kind = NumberKind::Decimal;
147 }
148 } else {
149 kind = NumberKind::Float;
150 }
151
152 if has_ident_after(lexer) {
153 let char = lexer.reader.next().unwrap();
154 let char = lexer.reader.convert_to_char(char)?;
155 bail!("Invalid token, found unexpected character `{char}` after number token", @lexer.current_span())
156 }
157 Ok(kind)
158}
159
160pub fn number(lexer: &mut Lexer, start: Token) -> Result<Number, SyntaxError> {
161 let kind = number_kind(lexer, start)?;
162 let span = lexer.current_span();
163 let number_str = prepare_number_str(lexer.span_str(span));
164 match kind {
165 NumberKind::Integer => number_str
166 .parse()
167 .map(Number::Int)
168 .map_err(|e| syntax_error!("Failed to parse number: {e}", @lexer.current_span())),
169 NumberKind::Float => {
170 let number_str = number_str.trim_end_matches('f');
171 number_str
172 .parse()
173 .map(Number::Float)
174 .map_err(|e| syntax_error!("Failed to parse number: {e}", @lexer.current_span()))
175 }
176 NumberKind::Decimal => {
177 let number_str = number_str.trim_end_matches("dec");
178 let decimal = if number_str.contains(['e', 'E']) {
179 Decimal::from_scientific(number_str).map_err(
180 |e| syntax_error!("Failed to parser decimal: {e}", @lexer.current_span()),
181 )?
182 } else {
183 Decimal::from_str(number_str).map_err(
184 |e| syntax_error!("Failed to parser decimal: {e}", @lexer.current_span()),
185 )?
186 };
187 Ok(Number::Decimal(decimal))
188 }
189 }
190}
191
192pub fn integer<I>(lexer: &mut Lexer, start: Token) -> Result<I, SyntaxError>
195where
196 I: FromStr<Err = ParseIntError>,
197{
198 let offset = start.span.offset as usize;
199 match start.kind {
200 t!("-") | t!("+") => {
201 eat_digits1(lexer, offset)?;
202 }
203 TokenKind::Digits => {}
204 x => bail!("Unexpected token {x}, expected integer",@start.span),
205 };
206
207 if has_ident_after(lexer) {
208 let char = lexer.reader.next().unwrap();
209 let char = lexer.reader.convert_to_char(char)?;
210 bail!("Invalid token, found unexpected character `{char} after integer token", @lexer.current_span())
211 }
212
213 let last_offset = lexer.reader.offset();
214 let peek = lexer.reader.peek();
215 if peek == Some(b'.') {
216 let is_mantissa = lexer.reader.peek1().map(|x| x.is_ascii_digit()).unwrap_or(false);
217 if is_mantissa {
218 let span = Span {
219 offset: last_offset as u32,
220 len: 1,
221 };
222 bail!("Unexpected character `.` starting float, only integers are allowed here", @span)
223 }
224 }
225
226 if peek == Some(b'e') || peek == Some(b'E') {
227 bail!("Unexpected character `{}` only integers are allowed here",peek.unwrap() as char, @lexer.current_span())
228 }
229
230 let span = lexer.current_span();
231 let str = prepare_number_str(lexer.span_str(span));
232 str.parse().map_err(|e| syntax_error!("Invalid integer: {e}", @span))
233}
234
235pub fn float<I>(lexer: &mut Lexer, start: Token) -> Result<I, SyntaxError>
238where
239 I: FromStr<Err = ParseFloatError>,
240{
241 let offset = start.span.offset as usize;
242 match start.kind {
243 t!("-") | t!("+") => {
244 eat_digits1(lexer, offset)?;
245 }
246 TokenKind::Digits => {}
247 x => bail!("Unexpected token {x}, expected floating point number",@start.span),
248 };
249
250 let before_mantissa = lexer.reader.offset();
251 if lexer.eat(b'.') {
252 eat_digits1(lexer, before_mantissa)?;
253 }
254
255 let before_exponent = lexer.reader.offset();
256 if lexer.eat(b'e') || lexer.eat(b'E') {
257 if !lexer.eat(b'-') {
258 lexer.eat(b'+');
259 }
260
261 eat_digits1(lexer, before_exponent)?;
262 }
263
264 let number_span = lexer.current_span();
265
266 lexer.eat(b'f');
267
268 if has_ident_after(lexer) {
269 let char = lexer.reader.next().unwrap();
270 let char = lexer.reader.convert_to_char(char)?;
271 bail!("Invalid token, found invalid character `{char}` after number token", @lexer.current_span())
272 }
273
274 let str = prepare_number_str(lexer.span_str(number_span));
275 str.parse()
276 .map_err(|e| syntax_error!("Invalid floating point number: {e}", @lexer.current_span()))
277}
278
279pub fn duration(lexer: &mut Lexer, start: Token) -> Result<Duration, SyntaxError> {
280 match start.kind {
281 TokenKind::Digits => {}
282 x => bail!("Unexpected token {x}, expected duration", @start.span),
283 }
284
285 let mut duration = Duration::ZERO;
286
287 let mut number_span = start.span;
288 loop {
289 let suffix = lex_duration_suffix(lexer)?;
290
291 let numeric_string = prepare_number_str(lexer.span_str(number_span));
292 let numeric_value: u64 = numeric_string.parse().map_err(
293 |e| syntax_error!("Invalid token, failed to parse duration digits: {e}",@lexer.current_span()),
294 )?;
295
296 let addition = match suffix {
297 DurationSuffix::Nano => Duration::from_nanos(numeric_value),
298 DurationSuffix::Micro => Duration::from_micros(numeric_value),
299 DurationSuffix::Milli => Duration::from_millis(numeric_value),
300 DurationSuffix::Second => Duration::from_secs(numeric_value),
301 DurationSuffix::Minute => {
302 let minutes = numeric_value.checked_mul(SECONDS_PER_MINUTE).ok_or_else(
303 || syntax_error!("Invalid duration, value overflowed maximum allowed value", @lexer.current_span()),
304 )?;
305 Duration::from_secs(minutes)
306 }
307 DurationSuffix::Hour => {
308 let hours = numeric_value.checked_mul(SECONDS_PER_HOUR).ok_or_else(
309 || syntax_error!("Invalid duration, value overflowed maximum allowed value", @lexer.current_span()),
310 )?;
311 Duration::from_secs(hours)
312 }
313 DurationSuffix::Day => {
314 let day = numeric_value.checked_mul(SECONDS_PER_DAY).ok_or_else(
315 || syntax_error!("Invalid duration, value overflowed maximum allowed value", @lexer.current_span()),
316 )?;
317 Duration::from_secs(day)
318 }
319 DurationSuffix::Week => {
320 let week = numeric_value.checked_mul(SECONDS_PER_WEEK).ok_or_else(
321 || syntax_error!("Invalid duration, value overflowed maximum allowed value", @lexer.current_span()),
322 )?;
323 Duration::from_secs(week)
324 }
325 DurationSuffix::Year => {
326 let year = numeric_value.checked_mul(SECONDS_PER_YEAR).ok_or_else(
327 || syntax_error!("Invalid duration, value overflowed maximum allowed value", @lexer.current_span()),
328 )?;
329 Duration::from_secs(year)
330 }
331 };
332
333 duration = duration.checked_add(addition).ok_or_else(
334 || syntax_error!("Invalid duration, value overflowed maximum allowed value", @lexer.current_span()),
335 )?;
336
337 match lexer.reader.peek() {
338 Some(x) if x.is_ascii_digit() => {
339 let before = lexer.reader.offset();
340 eat_digits(lexer);
341 number_span = lexer.span_since(before);
342 }
343 _ => break,
344 }
345 }
346
347 Ok(duration)
348}
349
350fn lex_duration_suffix(lexer: &mut Lexer) -> Result<DurationSuffix, SyntaxError> {
351 let suffix = match lexer.reader.next() {
352 Some(b'n') => {
353 lexer.expect('s')?;
354 DurationSuffix::Nano
355 }
356 Some(b'u') => {
357 lexer.expect('s')?;
358 DurationSuffix::Micro
359 }
360 Some(b'm') => {
361 if lexer.eat(b's') {
362 DurationSuffix::Milli
363 } else {
364 DurationSuffix::Minute
365 }
366 }
367 Some(b's') => DurationSuffix::Second,
368 Some(b'h') => DurationSuffix::Hour,
369 Some(b'd') => DurationSuffix::Day,
370 Some(b'w') => DurationSuffix::Week,
371 Some(b'y') => DurationSuffix::Year,
372 Some(0xC2) => {
374 if !lexer.eat(0xB5) {
375 let char = lexer.reader.complete_char(0xC2)?;
376 bail!("Invalid duration token, expected a duration suffix found `{char}`",@lexer.current_span())
377 }
378 lexer.expect('s')?;
379 DurationSuffix::Micro
380 }
381 Some(x) => {
382 let char = lexer.reader.convert_to_char(x)?;
383 bail!("Invalid duration token, expected a duration suffix found `{char}`",@lexer.current_span())
384 }
385 None => {
386 bail!("Unexpected end of file, expected a duration suffix",@lexer.current_span())
387 }
388 };
389
390 if has_ident_after(lexer) {
391 let char = lexer.reader.next().unwrap();
392 let char = lexer.reader.convert_to_char(char)?;
393 bail!("Invalid token, found invalid character `{char}` after duration suffix", @lexer.current_span())
394 }
395
396 Ok(suffix)
397}
398
399fn has_ident_after(lexer: &mut Lexer) -> bool {
400 match lexer.reader.peek() {
401 Some(x) => !x.is_ascii() || x.is_ascii_alphabetic(),
402 None => false,
403 }
404}
405
406fn eat_digits1(lexer: &mut Lexer, start: usize) -> Result<(), SyntaxError> {
407 match lexer.reader.peek() {
408 Some(x) if x.is_ascii_digit() => {}
409 Some(x) => {
410 let char = lexer.reader.convert_to_char(x)?;
411 bail!("Invalid number token, expected a digit, found: {char}", @lexer.span_since(start));
412 }
413 None => {
414 bail!("Unexpected end of file, expected a number token digit", @lexer.span_since(start));
415 }
416 }
417
418 eat_digits(lexer);
419 Ok(())
420}
421
422fn eat_digits(lexer: &mut Lexer) {
423 while lexer.eat_when(|x| x.is_ascii_digit() || x == b'_') {}
424}