surrealdb_core/syn/lexer/compound/
number.rs

1use std::{
2	borrow::Cow,
3	num::{ParseFloatError, ParseIntError},
4	str::FromStr,
5	time::Duration,
6};
7
8use rust_decimal::Decimal;
9
10use crate::{
11	sql::{
12		duration::{
13			SECONDS_PER_DAY, SECONDS_PER_HOUR, SECONDS_PER_MINUTE, SECONDS_PER_WEEK,
14			SECONDS_PER_YEAR,
15		},
16		Number,
17	},
18	syn::{
19		error::{bail, syntax_error, SyntaxError},
20		lexer::Lexer,
21		token::{t, Span, Token, TokenKind},
22	},
23};
24
25pub enum Numeric {
26	Number(Number),
27	Duration(Duration),
28}
29
30/// Like numeric but holds of parsing the a number into a specific value.
31#[derive(Debug)]
32pub enum NumericKind {
33	Number(NumberKind),
34	Duration(Duration),
35}
36
37#[derive(Debug)]
38pub enum NumberKind {
39	Integer,
40	Float,
41	Decimal,
42}
43
44enum DurationSuffix {
45	Nano,
46	Micro,
47	Milli,
48	Second,
49	Minute,
50	Hour,
51	Day,
52	Week,
53	Year,
54}
55
56fn prepare_number_str(str: &str) -> Cow<str> {
57	if str.contains('_') {
58		Cow::Owned(str.chars().filter(|x| *x != '_').collect())
59	} else {
60		Cow::Borrowed(str)
61	}
62}
63
64/// Tokens which can start with digits: Number or Duration.
65/// Like numeric but holds of parsing the a number into a specific value.
66pub fn numeric_kind(lexer: &mut Lexer, start: Token) -> Result<NumericKind, SyntaxError> {
67	match start.kind {
68		t!("-") | t!("+") => number_kind(lexer, start).map(NumericKind::Number),
69		TokenKind::Digits => match lexer.reader.peek() {
70			Some(b'n' | b's' | b'm' | b'h' | b'y' | b'w' | b'u') => {
71				duration(lexer, start).map(NumericKind::Duration)
72			}
73			Some(b'd') => {
74				if let Some(b'e') = lexer.reader.peek1() {
75					number_kind(lexer, start).map(NumericKind::Number)
76				} else {
77					duration(lexer, start).map(NumericKind::Duration)
78				}
79			}
80			Some(x) if !x.is_ascii() => duration(lexer, start).map(NumericKind::Duration),
81			_ => number_kind(lexer, start).map(NumericKind::Number),
82		},
83		x => {
84			bail!("Unexpected token `{x}`, expected a numeric value, either a duration or number",@start.span)
85		}
86	}
87}
88
89/// Tokens which can start with digits: Number or Duration.
90pub fn numeric(lexer: &mut Lexer, start: Token) -> Result<Numeric, SyntaxError> {
91	match start.kind {
92		t!("-") | t!("+") => number(lexer, start).map(Numeric::Number),
93		TokenKind::Digits => match lexer.reader.peek() {
94			Some(b'n' | b's' | b'm' | b'h' | b'y' | b'w' | b'u') => {
95				duration(lexer, start).map(Numeric::Duration)
96			}
97			Some(b'd') => {
98				if lexer.reader.peek1() == Some(b'e') {
99					number(lexer, start).map(Numeric::Number)
100				} else {
101					duration(lexer, start).map(Numeric::Duration)
102				}
103			}
104			Some(x) if !x.is_ascii() => duration(lexer, start).map(Numeric::Duration),
105			_ => number(lexer, start).map(Numeric::Number),
106		},
107		x => {
108			bail!("Unexpected token `{x}`, expected a numeric value, either a duration or number",@start.span)
109		}
110	}
111}
112
113pub fn number_kind(lexer: &mut Lexer, start: Token) -> Result<NumberKind, SyntaxError> {
114	let offset = start.span.offset as usize;
115	match start.kind {
116		t!("-") | t!("+") => {
117			eat_digits1(lexer, offset)?;
118		}
119		TokenKind::Digits => {}
120		x => bail!("Unexpected start token for integer: {x}",@start.span),
121	}
122
123	let mut kind = NumberKind::Integer;
124
125	let before_mantissa = lexer.reader.offset();
126	// need to test for digit.. or digit.foo
127	if lexer.reader.peek1().map(|x| x.is_ascii_digit()).unwrap_or(false) && lexer.eat(b'.') {
128		eat_digits1(lexer, before_mantissa)?;
129		kind = NumberKind::Float;
130	}
131
132	let before_exponent = lexer.reader.offset();
133	if lexer.eat(b'e') || lexer.eat(b'E') {
134		if !lexer.eat(b'-') {
135			lexer.eat(b'+');
136		}
137
138		eat_digits1(lexer, before_exponent)?;
139		kind = NumberKind::Float;
140	}
141
142	if !lexer.eat(b'f') {
143		if lexer.eat(b'd') {
144			lexer.expect('e')?;
145			lexer.expect('c')?;
146			kind = NumberKind::Decimal;
147		}
148	} else {
149		kind = NumberKind::Float;
150	}
151
152	if has_ident_after(lexer) {
153		let char = lexer.reader.next().unwrap();
154		let char = lexer.reader.convert_to_char(char)?;
155		bail!("Invalid token, found unexpected character `{char}` after number token", @lexer.current_span())
156	}
157	Ok(kind)
158}
159
160pub fn number(lexer: &mut Lexer, start: Token) -> Result<Number, SyntaxError> {
161	let kind = number_kind(lexer, start)?;
162	let span = lexer.current_span();
163	let number_str = prepare_number_str(lexer.span_str(span));
164	match kind {
165		NumberKind::Integer => number_str
166			.parse()
167			.map(Number::Int)
168			.map_err(|e| syntax_error!("Failed to parse number: {e}", @lexer.current_span())),
169		NumberKind::Float => {
170			let number_str = number_str.trim_end_matches('f');
171			number_str
172				.parse()
173				.map(Number::Float)
174				.map_err(|e| syntax_error!("Failed to parse number: {e}", @lexer.current_span()))
175		}
176		NumberKind::Decimal => {
177			let number_str = number_str.trim_end_matches("dec");
178			let decimal = if number_str.contains(['e', 'E']) {
179				Decimal::from_scientific(number_str).map_err(
180					|e| syntax_error!("Failed to parser decimal: {e}", @lexer.current_span()),
181				)?
182			} else {
183				Decimal::from_str(number_str).map_err(
184					|e| syntax_error!("Failed to parser decimal: {e}", @lexer.current_span()),
185				)?
186			};
187			Ok(Number::Decimal(decimal))
188		}
189	}
190}
191
192/// Generic integer parsing method,
193/// works for all unsigned integers.
194pub fn integer<I>(lexer: &mut Lexer, start: Token) -> Result<I, SyntaxError>
195where
196	I: FromStr<Err = ParseIntError>,
197{
198	let offset = start.span.offset as usize;
199	match start.kind {
200		t!("-") | t!("+") => {
201			eat_digits1(lexer, offset)?;
202		}
203		TokenKind::Digits => {}
204		x => bail!("Unexpected token {x}, expected integer",@start.span),
205	};
206
207	if has_ident_after(lexer) {
208		let char = lexer.reader.next().unwrap();
209		let char = lexer.reader.convert_to_char(char)?;
210		bail!("Invalid token, found unexpected character `{char} after integer token", @lexer.current_span())
211	}
212
213	let last_offset = lexer.reader.offset();
214	let peek = lexer.reader.peek();
215	if peek == Some(b'.') {
216		let is_mantissa = lexer.reader.peek1().map(|x| x.is_ascii_digit()).unwrap_or(false);
217		if is_mantissa {
218			let span = Span {
219				offset: last_offset as u32,
220				len: 1,
221			};
222			bail!("Unexpected character `.` starting float, only integers are allowed here", @span)
223		}
224	}
225
226	if peek == Some(b'e') || peek == Some(b'E') {
227		bail!("Unexpected character `{}` only integers are allowed here",peek.unwrap() as char, @lexer.current_span())
228	}
229
230	let span = lexer.current_span();
231	let str = prepare_number_str(lexer.span_str(span));
232	str.parse().map_err(|e| syntax_error!("Invalid integer: {e}", @span))
233}
234
235/// Generic integer parsing method,
236/// works for all unsigned integers.
237pub fn float<I>(lexer: &mut Lexer, start: Token) -> Result<I, SyntaxError>
238where
239	I: FromStr<Err = ParseFloatError>,
240{
241	let offset = start.span.offset as usize;
242	match start.kind {
243		t!("-") | t!("+") => {
244			eat_digits1(lexer, offset)?;
245		}
246		TokenKind::Digits => {}
247		x => bail!("Unexpected token {x}, expected floating point number",@start.span),
248	};
249
250	let before_mantissa = lexer.reader.offset();
251	if lexer.eat(b'.') {
252		eat_digits1(lexer, before_mantissa)?;
253	}
254
255	let before_exponent = lexer.reader.offset();
256	if lexer.eat(b'e') || lexer.eat(b'E') {
257		if !lexer.eat(b'-') {
258			lexer.eat(b'+');
259		}
260
261		eat_digits1(lexer, before_exponent)?;
262	}
263
264	let number_span = lexer.current_span();
265
266	lexer.eat(b'f');
267
268	if has_ident_after(lexer) {
269		let char = lexer.reader.next().unwrap();
270		let char = lexer.reader.convert_to_char(char)?;
271		bail!("Invalid token, found invalid character `{char}` after number token", @lexer.current_span())
272	}
273
274	let str = prepare_number_str(lexer.span_str(number_span));
275	str.parse()
276		.map_err(|e| syntax_error!("Invalid floating point number: {e}", @lexer.current_span()))
277}
278
279pub fn duration(lexer: &mut Lexer, start: Token) -> Result<Duration, SyntaxError> {
280	match start.kind {
281		TokenKind::Digits => {}
282		x => bail!("Unexpected token {x}, expected duration", @start.span),
283	}
284
285	let mut duration = Duration::ZERO;
286
287	let mut number_span = start.span;
288	loop {
289		let suffix = lex_duration_suffix(lexer)?;
290
291		let numeric_string = prepare_number_str(lexer.span_str(number_span));
292		let numeric_value: u64 = numeric_string.parse().map_err(
293			|e| syntax_error!("Invalid token, failed to parse duration digits: {e}",@lexer.current_span()),
294		)?;
295
296		let addition = match suffix {
297			DurationSuffix::Nano => Duration::from_nanos(numeric_value),
298			DurationSuffix::Micro => Duration::from_micros(numeric_value),
299			DurationSuffix::Milli => Duration::from_millis(numeric_value),
300			DurationSuffix::Second => Duration::from_secs(numeric_value),
301			DurationSuffix::Minute => {
302				let minutes = numeric_value.checked_mul(SECONDS_PER_MINUTE).ok_or_else(
303					|| syntax_error!("Invalid duration, value overflowed maximum allowed value", @lexer.current_span()),
304				)?;
305				Duration::from_secs(minutes)
306			}
307			DurationSuffix::Hour => {
308				let hours = numeric_value.checked_mul(SECONDS_PER_HOUR).ok_or_else(
309					|| syntax_error!("Invalid duration, value overflowed maximum allowed value", @lexer.current_span()),
310				)?;
311				Duration::from_secs(hours)
312			}
313			DurationSuffix::Day => {
314				let day = numeric_value.checked_mul(SECONDS_PER_DAY).ok_or_else(
315					|| syntax_error!("Invalid duration, value overflowed maximum allowed value", @lexer.current_span()),
316				)?;
317				Duration::from_secs(day)
318			}
319			DurationSuffix::Week => {
320				let week = numeric_value.checked_mul(SECONDS_PER_WEEK).ok_or_else(
321					|| syntax_error!("Invalid duration, value overflowed maximum allowed value", @lexer.current_span()),
322				)?;
323				Duration::from_secs(week)
324			}
325			DurationSuffix::Year => {
326				let year = numeric_value.checked_mul(SECONDS_PER_YEAR).ok_or_else(
327					|| syntax_error!("Invalid duration, value overflowed maximum allowed value", @lexer.current_span()),
328				)?;
329				Duration::from_secs(year)
330			}
331		};
332
333		duration = duration.checked_add(addition).ok_or_else(
334			|| syntax_error!("Invalid duration, value overflowed maximum allowed value", @lexer.current_span()),
335		)?;
336
337		match lexer.reader.peek() {
338			Some(x) if x.is_ascii_digit() => {
339				let before = lexer.reader.offset();
340				eat_digits(lexer);
341				number_span = lexer.span_since(before);
342			}
343			_ => break,
344		}
345	}
346
347	Ok(duration)
348}
349
350fn lex_duration_suffix(lexer: &mut Lexer) -> Result<DurationSuffix, SyntaxError> {
351	let suffix = match lexer.reader.next() {
352		Some(b'n') => {
353			lexer.expect('s')?;
354			DurationSuffix::Nano
355		}
356		Some(b'u') => {
357			lexer.expect('s')?;
358			DurationSuffix::Micro
359		}
360		Some(b'm') => {
361			if lexer.eat(b's') {
362				DurationSuffix::Milli
363			} else {
364				DurationSuffix::Minute
365			}
366		}
367		Some(b's') => DurationSuffix::Second,
368		Some(b'h') => DurationSuffix::Hour,
369		Some(b'd') => DurationSuffix::Day,
370		Some(b'w') => DurationSuffix::Week,
371		Some(b'y') => DurationSuffix::Year,
372		// Start byte of 'ยต'
373		Some(0xC2) => {
374			if !lexer.eat(0xB5) {
375				let char = lexer.reader.complete_char(0xC2)?;
376				bail!("Invalid duration token, expected a duration suffix found `{char}`",@lexer.current_span())
377			}
378			lexer.expect('s')?;
379			DurationSuffix::Micro
380		}
381		Some(x) => {
382			let char = lexer.reader.convert_to_char(x)?;
383			bail!("Invalid duration token, expected a duration suffix found `{char}`",@lexer.current_span())
384		}
385		None => {
386			bail!("Unexpected end of file, expected a duration suffix",@lexer.current_span())
387		}
388	};
389
390	if has_ident_after(lexer) {
391		let char = lexer.reader.next().unwrap();
392		let char = lexer.reader.convert_to_char(char)?;
393		bail!("Invalid token, found invalid character `{char}` after duration suffix", @lexer.current_span())
394	}
395
396	Ok(suffix)
397}
398
399fn has_ident_after(lexer: &mut Lexer) -> bool {
400	match lexer.reader.peek() {
401		Some(x) => !x.is_ascii() || x.is_ascii_alphabetic(),
402		None => false,
403	}
404}
405
406fn eat_digits1(lexer: &mut Lexer, start: usize) -> Result<(), SyntaxError> {
407	match lexer.reader.peek() {
408		Some(x) if x.is_ascii_digit() => {}
409		Some(x) => {
410			let char = lexer.reader.convert_to_char(x)?;
411			bail!("Invalid number token, expected a digit, found: {char}", @lexer.span_since(start));
412		}
413		None => {
414			bail!("Unexpected end of file, expected a number token digit", @lexer.span_since(start));
415		}
416	}
417
418	eat_digits(lexer);
419	Ok(())
420}
421
422fn eat_digits(lexer: &mut Lexer) {
423	while lexer.eat_when(|x| x.is_ascii_digit() || x == b'_') {}
424}