surrealdb_core/syn/parser/
expression.rs

1//! This module defines the pratt parser for operators.
2
3use std::ops::Bound;
4
5use reblessive::Stk;
6
7use super::mac::{expected_whitespace, unexpected};
8use crate::sql::Range;
9use crate::sql::{value::TryNeg, Cast, Expression, Number, Operator, Value};
10use crate::syn::error::bail;
11use crate::syn::token::{self, Token};
12use crate::syn::{
13	parser::{mac::expected, ParseResult, Parser},
14	token::{t, TokenKind},
15};
16
17/// An enum which defines how strong a operator binds it's operands.
18///
19/// If a binding power is higher the operator is more likely to directly operate on it's
20/// neighbours.
21#[derive(Debug, Clone, Copy, Eq, PartialEq, Ord, PartialOrd)]
22pub enum BindingPower {
23	Base,
24	Or,
25	And,
26	Equality,
27	Relation,
28	AddSub,
29	MulDiv,
30	Power,
31	Cast,
32	Range,
33	Nullish,
34	Unary,
35}
36
37impl Parser<'_> {
38	/// Parsers a generic value.
39	///
40	/// A generic loose ident like `foo` in for example `foo.bar` can be two different values
41	/// depending on context: a table or a field the current document. This function parses loose
42	/// idents as a table, see [`parse_value_field`] for parsing loose idents as fields
43	pub async fn parse_value_table(&mut self, ctx: &mut Stk) -> ParseResult<Value> {
44		let old = self.table_as_field;
45		self.table_as_field = false;
46		let res = self.pratt_parse_expr(ctx, BindingPower::Base).await;
47		self.table_as_field = old;
48		res
49	}
50
51	/// Parsers a generic value.
52	///
53	/// A generic loose ident like `foo` in for example `foo.bar` can be two different values
54	/// depending on context: a table or a field the current document. This function parses loose
55	/// idents as a field, see [`parse_value`] for parsing loose idents as table
56	pub(crate) async fn parse_value_field(&mut self, ctx: &mut Stk) -> ParseResult<Value> {
57		let old = self.table_as_field;
58		self.table_as_field = true;
59		let res = self.pratt_parse_expr(ctx, BindingPower::Base).await;
60		self.table_as_field = old;
61		res
62	}
63
64	/// Parsers a generic value.
65	///
66	/// Inherits how loose identifiers are parsed from it's caller.
67	pub(super) async fn parse_value_inherit(&mut self, ctx: &mut Stk) -> ParseResult<Value> {
68		self.pratt_parse_expr(ctx, BindingPower::Base).await
69	}
70
71	/// Parse a assigner operator.
72	pub(super) fn parse_assigner(&mut self) -> ParseResult<Operator> {
73		let token = self.next();
74		match token.kind {
75			t!("=") => Ok(Operator::Equal),
76			t!("+=") => Ok(Operator::Inc),
77			t!("-=") => Ok(Operator::Dec),
78			t!("+?=") => Ok(Operator::Ext),
79			_ => unexpected!(self, token, "an assign operator"),
80		}
81	}
82
83	/// Returns the binding power of an infix operator.
84	///
85	/// Binding power is the opposite of precendence: a higher binding power means that a token is
86	/// more like to operate directly on it's neighbours. Example `*` has a higher binding power
87	/// than `-` resulting in 1 - 2 * 3 being parsed as 1 - (2 * 3).
88	///
89	/// All operators in SurrealQL which are parsed by the functions in this module are left
90	/// associative or have no defined associativity.
91	fn infix_binding_power(&mut self, token: TokenKind) -> Option<BindingPower> {
92		// TODO: Look at ordering of operators.
93		match token {
94			// assigment operators have the lowest binding power.
95			//t!("+=") | t!("-=") | t!("+?=") => Some((2, 1)),
96			t!("||") | t!("OR") => Some(BindingPower::Or),
97			t!("&&") | t!("AND") => Some(BindingPower::And),
98
99			// Equality operators have same binding power.
100			t!("=")
101			| t!("IS")
102			| t!("==")
103			| t!("!=")
104			| t!("*=")
105			| t!("?=")
106			| t!("~")
107			| t!("!~")
108			| t!("*~")
109			| t!("?~")
110			| t!("@") => Some(BindingPower::Equality),
111
112			t!("<") => {
113				let peek = self.peek_whitespace1();
114				if matches!(peek.kind, t!("-") | t!("->") | t!("..")) {
115					return None;
116				}
117				Some(BindingPower::Relation)
118			}
119
120			t!(">") => {
121				if self.peek_whitespace1().kind == t!("..") {
122					return Some(BindingPower::Range);
123				}
124				Some(BindingPower::Relation)
125			}
126
127			t!("..") => Some(BindingPower::Range),
128
129			t!("<=")
130			| t!(">=")
131			| t!("∋")
132			| t!("CONTAINS")
133			| t!("∌")
134			| t!("CONTAINSNOT")
135			| t!("∈")
136			| t!("INSIDE")
137			| t!("∉")
138			| t!("NOTINSIDE")
139			| t!("⊇")
140			| t!("CONTAINSALL")
141			| t!("⊃")
142			| t!("CONTAINSANY")
143			| t!("⊅")
144			| t!("CONTAINSNONE")
145			| t!("⊆")
146			| t!("ALLINSIDE")
147			| t!("⊂")
148			| t!("ANYINSIDE")
149			| t!("⊄")
150			| t!("NONEINSIDE")
151			| t!("OUTSIDE")
152			| t!("INTERSECTS")
153			| t!("NOT")
154			| t!("IN")
155			| t!("<|") => Some(BindingPower::Relation),
156
157			t!("+") | t!("-") => Some(BindingPower::AddSub),
158			t!("*") | t!("×") | t!("/") | t!("÷") | t!("%") => Some(BindingPower::MulDiv),
159			t!("**") => Some(BindingPower::Power),
160			t!("?:") | t!("??") => Some(BindingPower::Nullish),
161			_ => None,
162		}
163	}
164
165	fn prefix_binding_power(&mut self, token: TokenKind) -> Option<BindingPower> {
166		match token {
167			t!("!") | t!("+") | t!("-") => Some(BindingPower::Unary),
168			t!("..") => Some(BindingPower::Range),
169			t!("<") => {
170				let peek = self.peek1();
171				if matches!(peek.kind, t!("-") | t!("->") | t!("FUTURE")) {
172					return None;
173				}
174				Some(BindingPower::Cast)
175			}
176			_ => None,
177		}
178	}
179
180	async fn parse_prefix_op(&mut self, ctx: &mut Stk, min_bp: BindingPower) -> ParseResult<Value> {
181		let token = self.peek();
182		let operator = match token.kind {
183			t!("+") => {
184				// +123 is a single number token, so parse it as such
185				let p = self.peek_whitespace1();
186				if matches!(p.kind, TokenKind::Digits) {
187					// This is a bit of an annoying special case.
188					// The problem is that `+` and `-` can be an prefix operator and a the start
189					// of a number token.
190					// To figure out which it is we need to peek the next whitespace token,
191					// This eats the digits that the lexer needs to lex the number. So we we need
192					// to backup before the digits token was consumed, clear the digits token from
193					// the token buffer so it isn't popped after parsing the number and then lex the
194					// number.
195					self.lexer.backup_before(p.span);
196					self.token_buffer.clear();
197					self.token_buffer.push(token);
198					return self.next_token_value::<Number>().map(Value::Number);
199				}
200				self.pop_peek();
201
202				Operator::Add
203			}
204			t!("-") => {
205				// -123 is a single number token, so parse it as such
206				let p = self.peek_whitespace1();
207				if matches!(p.kind, TokenKind::Digits) {
208					// This is a bit of an annoying special case.
209					// The problem is that `+` and `-` can be an prefix operator and a the start
210					// of a number token.
211					// To figure out which it is we need to peek the next whitespace token,
212					// This eats the digits that the lexer needs to lex the number. So we we need
213					// to backup before the digits token was consumed, clear the digits token from
214					// the token buffer so it isn't popped after parsing the number and then lex the
215					// number.
216					self.lexer.backup_before(p.span);
217					self.token_buffer.clear();
218					self.token_buffer.push(token);
219					return self.next_token_value::<Number>().map(Value::Number);
220				}
221
222				self.pop_peek();
223
224				Operator::Neg
225			}
226			t!("!") => {
227				self.pop_peek();
228				Operator::Not
229			}
230			t!("<") => {
231				self.pop_peek();
232				let kind = self.parse_kind(ctx, token.span).await?;
233				let value = ctx.run(|ctx| self.pratt_parse_expr(ctx, BindingPower::Cast)).await?;
234				let cast = Cast(kind, value);
235				return Ok(Value::Cast(Box::new(cast)));
236			}
237			t!("..") => return self.parse_prefix_range(ctx).await,
238			// should be unreachable as we previously check if the token was a prefix op.
239			_ => unreachable!(),
240		};
241
242		let v = ctx.run(|ctx| self.pratt_parse_expr(ctx, min_bp)).await?;
243
244		// HACK: For compatiblity with the old parser apply + and - operator immediately if the
245		// left value is a number.
246		if let Value::Number(number) = v {
247			// If the number was already negative we already did apply a - so just return a unary
248			// in this case.
249			if number.is_positive() {
250				if let Operator::Neg = operator {
251					// this can only panic if `number` is i64::MIN which currently can't be parsed.
252					return Ok(Value::Number(number.try_neg().unwrap()));
253				}
254			}
255
256			if let Operator::Add = operator {
257				// doesn't do anything.
258				return Ok(Value::Number(number));
259			}
260			Ok(Value::Expression(Box::new(Expression::Unary {
261				o: operator,
262				v: Value::Number(number),
263			})))
264		} else {
265			Ok(Value::Expression(Box::new(Expression::Unary {
266				o: operator,
267				v,
268			})))
269		}
270	}
271
272	pub(super) fn parse_knn(&mut self, token: Token) -> ParseResult<Operator> {
273		let amount = self.next_token_value()?;
274		let op = if self.eat(t!(",")) {
275			let token = self.peek();
276			match token.kind {
277				TokenKind::Distance(_) => {
278					let d = self.parse_distance().map(Some)?;
279					Operator::Knn(amount, d)
280				}
281				TokenKind::Digits | TokenKind::Glued(token::Glued::Number) => {
282					let ef = self.next_token_value()?;
283					Operator::Ann(amount, ef)
284				}
285				_ => {
286					bail!("Unexpected token {} expected a distance of an integer", token.kind,
287						@token.span => "The NN operator accepts either a distance or an EF value (integer)")
288				}
289			}
290		} else {
291			Operator::Knn(amount, None)
292		};
293		self.expect_closing_delimiter(t!("|>"), token.span)?;
294		Ok(op)
295	}
296
297	fn expression_is_relation(value: &Value) -> bool {
298		if let Value::Expression(x) = value {
299			return Self::operator_is_relation(x.operator());
300		}
301		false
302	}
303
304	fn operator_is_relation(operator: &Operator) -> bool {
305		matches!(
306			operator,
307			Operator::Equal
308				| Operator::NotEqual
309				| Operator::AllEqual
310				| Operator::AnyEqual
311				| Operator::NotLike
312				| Operator::AllLike
313				| Operator::AnyLike
314				| Operator::Like
315				| Operator::Contain
316				| Operator::NotContain
317				| Operator::NotInside
318				| Operator::ContainAll
319				| Operator::ContainNone
320				| Operator::AllInside
321				| Operator::AnyInside
322				| Operator::NoneInside
323				| Operator::Outside
324				| Operator::Intersects
325				| Operator::Inside
326				| Operator::Knn(_, _)
327		)
328	}
329
330	async fn parse_infix_op(
331		&mut self,
332		ctx: &mut Stk,
333		min_bp: BindingPower,
334		lhs: Value,
335	) -> ParseResult<Value> {
336		let token = self.next();
337		let operator = match token.kind {
338			// TODO: change operator name?
339			t!("||") | t!("OR") => Operator::Or,
340			t!("&&") | t!("AND") => Operator::And,
341			t!("?:") => Operator::Tco,
342			t!("??") => Operator::Nco,
343			t!("==") => Operator::Exact,
344			t!("!=") => Operator::NotEqual,
345			t!("*=") => Operator::AllEqual,
346			t!("?=") => Operator::AnyEqual,
347			t!("=") => Operator::Equal,
348			t!("!~") => Operator::NotLike,
349			t!("*~") => Operator::AllLike,
350			t!("?~") => Operator::AnyLike,
351			t!("~") => Operator::Like,
352			t!("@") => {
353				let reference = (!self.eat(t!("@")))
354					.then(|| {
355						let number = self.next_token_value()?;
356						expected!(self, t!("@"));
357						Ok(number)
358					})
359					.transpose()?;
360				Operator::Matches(reference)
361			}
362			t!("<=") => Operator::LessThanOrEqual,
363			t!("<") => Operator::LessThan,
364			t!(">=") => Operator::MoreThanOrEqual,
365			t!("**") => Operator::Pow,
366			t!("+") => Operator::Add,
367			t!("-") => Operator::Sub,
368			t!("*") | t!("×") => Operator::Mul,
369			t!("/") | t!("÷") => Operator::Div,
370			t!("%") => Operator::Rem,
371			t!("∋") | t!("CONTAINS") => Operator::Contain,
372			t!("∌") | t!("CONTAINSNOT") => Operator::NotContain,
373			t!("∈") | t!("INSIDE") => Operator::Inside,
374			t!("∉") | t!("NOTINSIDE") => Operator::NotInside,
375			t!("⊇") | t!("CONTAINSALL") => Operator::ContainAll,
376			t!("⊃") | t!("CONTAINSANY") => Operator::ContainAny,
377			t!("⊅") | t!("CONTAINSNONE") => Operator::ContainNone,
378			t!("⊆") | t!("ALLINSIDE") => Operator::AllInside,
379			t!("⊂") | t!("ANYINSIDE") => Operator::AnyInside,
380			t!("⊄") | t!("NONEINSIDE") => Operator::NoneInside,
381			t!("IS") => {
382				if self.eat(t!("NOT")) {
383					Operator::NotEqual
384				} else {
385					Operator::Equal
386				}
387			}
388			t!("OUTSIDE") => Operator::Outside,
389			t!("INTERSECTS") => Operator::Intersects,
390			t!("NOT") => {
391				expected!(self, t!("IN"));
392				Operator::NotInside
393			}
394			t!("IN") => Operator::Inside,
395			t!("<|") => self.parse_knn(token)?,
396
397			t!(">") => {
398				if self.peek_whitespace().kind == t!("..") {
399					self.pop_peek();
400					return self.parse_infix_range(ctx, true, lhs).await;
401				}
402				Operator::MoreThan
403			}
404			t!("..") => {
405				return self.parse_infix_range(ctx, false, lhs).await;
406			}
407
408			// should be unreachable as we previously check if the token was a prefix op.
409			x => unreachable!("found non-operator token {x:?}"),
410		};
411		let before = self.recent_span();
412		let rhs = ctx.run(|ctx| self.pratt_parse_expr(ctx, min_bp)).await?;
413
414		if Self::operator_is_relation(&operator) && Self::expression_is_relation(&lhs) {
415			let span = before.covers(self.recent_span());
416			// 1 >= 2 >= 3 has no defined associativity and is often a mistake.
417			bail!("Chaining relational operators have no defined associativity.",
418				@span => "Use parens, '()', to specify which operator must be evaluated first")
419		}
420
421		Ok(Value::Expression(Box::new(Expression::Binary {
422			l: lhs,
423			o: operator,
424			r: rhs,
425		})))
426	}
427
428	async fn parse_infix_range(
429		&mut self,
430		ctx: &mut Stk,
431		exclusive: bool,
432		lhs: Value,
433	) -> ParseResult<Value> {
434		let inclusive = self.eat_whitespace(t!("="));
435
436		let before = self.recent_span();
437		let peek = self.peek_whitespace();
438		let rhs = if inclusive {
439			// ..= must be followed by an expression.
440			if peek.kind == TokenKind::WhiteSpace {
441				bail!("Unexpected whitespace, expected inclusive range to be immediately followed by a expression",
442					@peek.span => "Whitespace between a range and it's operands is dissallowed")
443			}
444			ctx.run(|ctx| self.pratt_parse_expr(ctx, BindingPower::Range)).await?
445		} else if Self::kind_starts_expression(peek.kind) {
446			ctx.run(|ctx| self.pratt_parse_expr(ctx, BindingPower::Range)).await?
447		} else {
448			return Ok(Value::Range(Box::new(Range {
449				beg: if exclusive {
450					Bound::Excluded(lhs)
451				} else {
452					Bound::Included(lhs)
453				},
454				end: Bound::Unbounded,
455			})));
456		};
457
458		if matches!(lhs, Value::Range(_)) {
459			let span = before.covers(self.recent_span());
460			// a..b..c is ambiguous, so throw an error
461			bail!("Chaining range operators has no specified associativity",
462				@span => "use parens, '()', to specify which operator must be evaluated first")
463		}
464
465		Ok(Value::Range(Box::new(Range {
466			beg: if exclusive {
467				Bound::Excluded(lhs)
468			} else {
469				Bound::Included(lhs)
470			},
471			end: if inclusive {
472				Bound::Included(rhs)
473			} else {
474				Bound::Excluded(rhs)
475			},
476		})))
477	}
478
479	async fn parse_prefix_range(&mut self, ctx: &mut Stk) -> ParseResult<Value> {
480		expected_whitespace!(self, t!(".."));
481		let inclusive = self.eat_whitespace(t!("="));
482		let before = self.recent_span();
483		let peek = self.peek_whitespace();
484		let rhs = if inclusive {
485			// ..= must be followed by an expression.
486			if peek.kind == TokenKind::WhiteSpace {
487				bail!("Unexpected whitespace, expected inclusive range to be immediately followed by a expression",
488					@peek.span => "Whitespace between a range and it's operands is dissallowed")
489			}
490			ctx.run(|ctx| self.pratt_parse_expr(ctx, BindingPower::Range)).await?
491		} else if Self::kind_starts_expression(peek.kind) {
492			ctx.run(|ctx| self.pratt_parse_expr(ctx, BindingPower::Range)).await?
493		} else {
494			return Ok(Value::Range(Box::new(Range {
495				beg: Bound::Unbounded,
496				end: Bound::Unbounded,
497			})));
498		};
499
500		if matches!(rhs, Value::Range(_)) {
501			let span = before.covers(self.recent_span());
502			// a..b..c is ambiguous, so throw an error
503			bail!("Chaining range operators has no specified associativity",
504						@span => "use parens, '()', to specify which operator must be evaluated first")
505		}
506
507		let range = Range {
508			beg: Bound::Unbounded,
509			end: if inclusive {
510				Bound::Included(rhs)
511			} else {
512				Bound::Excluded(rhs)
513			},
514		};
515		Ok(Value::Range(Box::new(range)))
516	}
517
518	/// The pratt parsing loop.
519	/// Parses expression according to binding power.
520	async fn pratt_parse_expr(
521		&mut self,
522		ctx: &mut Stk,
523		min_bp: BindingPower,
524	) -> ParseResult<Value> {
525		let peek = self.peek();
526		let mut lhs = if let Some(bp) = self.prefix_binding_power(peek.kind) {
527			self.parse_prefix_op(ctx, bp).await?
528		} else {
529			self.parse_idiom_expression(ctx).await?
530		};
531
532		loop {
533			let token = self.peek();
534			let Some(bp) = self.infix_binding_power(token.kind) else {
535				// explain that assignment operators can't be used in normal expressions.
536				if let t!("+=") | t!("*=") | t!("-=") | t!("+?=") = token.kind {
537					unexpected!(self,token,"an operator",
538						=> "assignment operators are only allowed in SET and DUPLICATE KEY UPDATE clauses")
539				}
540				break;
541			};
542
543			if bp <= min_bp {
544				break;
545			}
546
547			lhs = self.parse_infix_op(ctx, bp, lhs).await?;
548		}
549
550		Ok(lhs)
551	}
552}
553
554#[cfg(test)]
555mod test {
556	use super::*;
557	use crate::sql::{Block, Future, Kind};
558	use crate::syn::Parse;
559
560	#[test]
561	fn cast_int() {
562		let sql = "<int>1.2345";
563		let out = Value::parse(sql);
564		assert_eq!("<int> 1.2345f", format!("{}", out));
565		assert_eq!(out, Value::from(Cast(Kind::Int, 1.2345.into())));
566	}
567
568	#[test]
569	fn cast_string() {
570		let sql = "<string>1.2345";
571		let out = Value::parse(sql);
572		assert_eq!("<string> 1.2345f", format!("{}", out));
573		assert_eq!(out, Value::from(Cast(Kind::String, 1.2345.into())));
574	}
575
576	#[test]
577	fn expression_statement() {
578		let sql = "true AND false";
579		let out = Value::parse(sql);
580		assert_eq!("true AND false", format!("{}", out));
581	}
582
583	#[test]
584	fn expression_left_opened() {
585		let sql = "3 * 3 * 3 = 27";
586		let out = Value::parse(sql);
587		assert_eq!("3 * 3 * 3 = 27", format!("{}", out));
588	}
589
590	#[test]
591	fn expression_left_closed() {
592		let sql = "(3 * 3 * 3) = 27";
593		let out = Value::parse(sql);
594		assert_eq!("(3 * 3 * 3) = 27", format!("{}", out));
595	}
596
597	#[test]
598	fn expression_right_opened() {
599		let sql = "27 = 3 * 3 * 3";
600		let out = Value::parse(sql);
601		assert_eq!("27 = 3 * 3 * 3", format!("{}", out));
602	}
603
604	#[test]
605	fn expression_right_closed() {
606		let sql = "27 = (3 * 3 * 3)";
607		let out = Value::parse(sql);
608		assert_eq!("27 = (3 * 3 * 3)", format!("{}", out));
609	}
610
611	#[test]
612	fn expression_both_opened() {
613		let sql = "3 * 3 * 3 = 3 * 3 * 3";
614		let out = Value::parse(sql);
615		assert_eq!("3 * 3 * 3 = 3 * 3 * 3", format!("{}", out));
616	}
617
618	#[test]
619	fn expression_both_closed() {
620		let sql = "(3 * 3 * 3) = (3 * 3 * 3)";
621		let out = Value::parse(sql);
622		assert_eq!("(3 * 3 * 3) = (3 * 3 * 3)", format!("{}", out));
623	}
624
625	#[test]
626	fn expression_unary() {
627		let sql = "-a";
628		let out = Value::parse(sql);
629		assert_eq!(sql, format!("{}", out));
630	}
631
632	#[test]
633	fn expression_with_unary() {
634		let sql = "-(5) + 5";
635		let out = Value::parse(sql);
636		assert_eq!(sql, format!("{}", out));
637	}
638
639	#[test]
640	fn expression_left_associative() {
641		let sql = "1 - 1 - 1";
642		let out = Value::parse(sql);
643		let one = Value::Number(Number::Int(1));
644		let expected = Value::Expression(Box::new(Expression::Binary {
645			l: Value::Expression(Box::new(Expression::Binary {
646				l: one.clone(),
647				o: Operator::Sub,
648				r: one.clone(),
649			})),
650			o: Operator::Sub,
651			r: one,
652		}));
653		assert_eq!(expected, out);
654	}
655
656	#[test]
657	fn parse_expression() {
658		let sql = "<future> { 5 + 10 }";
659		let out = Value::parse(sql);
660		assert_eq!("<future> { 5 + 10 }", format!("{}", out));
661		assert_eq!(
662			out,
663			Value::from(Future(Block::from(Value::from(Expression::Binary {
664				l: Value::Number(Number::Int(5)),
665				o: Operator::Add,
666				r: Value::Number(Number::Int(10))
667			}))))
668		);
669	}
670}