surrealdb_core/syn/token/
mod.rs

1//! Module specifying the token representation of the parser.
2
3use std::{fmt, hash::Hash};
4
5mod keyword;
6pub(crate) use keyword::keyword_t;
7pub use keyword::Keyword;
8mod mac;
9use crate::sql::{language::Language, Algorithm};
10pub(crate) use mac::t;
11
12/// A location in the source passed to the lexer.
13#[derive(Clone, Copy, Eq, PartialEq, Hash, Debug)]
14#[non_exhaustive]
15pub struct Span {
16	/// Offset in bytes.
17	pub offset: u32,
18	/// The amount of bytes this location encompasses.
19	pub len: u32,
20}
21
22impl Span {
23	/// Create a new empty span.
24	pub const fn empty() -> Self {
25		Span {
26			offset: 0,
27			len: 0,
28		}
29	}
30
31	pub fn is_empty(&self) -> bool {
32		self.len == 0
33	}
34
35	/// Create a span that covers the range of both spans as well as possible space inbetween.
36	pub fn covers(self, other: Span) -> Span {
37		let start = self.offset.min(other.offset);
38		let end = (self.offset + self.len).max(other.offset + other.len);
39		let len = end - start;
40		Span {
41			offset: start,
42			len,
43		}
44	}
45
46	// returns a zero-length span that starts after the current span.
47	pub fn after(self) -> Span {
48		Span {
49			offset: self.offset + self.len,
50			len: 0,
51		}
52	}
53	// returns a zero-length span that starts after the current span.
54	pub fn after_offset(self) -> u32 {
55		self.offset + self.len
56	}
57
58	/// Returns if the given span is the next span after this one.
59	pub fn is_followed_by(&self, other: &Self) -> bool {
60		let end = self.offset as usize + self.len as usize;
61		other.offset as usize == end
62	}
63
64	/// Returns if this span immediately follows the given.
65	pub fn follows_from(&self, other: &Self) -> bool {
66		let end = self.offset as usize + self.len as usize;
67		other.offset as usize == end
68	}
69}
70
71#[repr(u8)]
72#[derive(Clone, Copy, Eq, PartialEq, Hash, Debug)]
73#[non_exhaustive]
74pub enum Operator {
75	/// `!`
76	Not,
77	/// `+`
78	Add,
79	/// `-`
80	Subtract,
81	/// `÷`
82	Divide,
83	/// `×` or `∙`
84	Mult,
85	/// `%`
86	Modulo,
87	/// `||`
88	Or,
89	/// `&&`
90	And,
91	/// `<=`
92	LessEqual,
93	/// `>=`
94	GreaterEqual,
95	/// `*`
96	Star,
97	/// `**`
98	Power,
99	/// `=`
100	Equal,
101	/// `==`
102	Exact,
103	/// `!=`
104	NotEqual,
105	/// `*=`
106	AllEqual,
107	/// `?=`
108	AnyEqual,
109	/// `~`
110	Like,
111	/// `!~`
112	NotLike,
113	/// `*~`
114	AllLike,
115	/// `?~`
116	AnyLike,
117	/// `∋`
118	Contains,
119	/// `∌`
120	NotContains,
121	/// `⊇`
122	ContainsAll,
123	/// `⊃`
124	ContainsAny,
125	/// `⊅`
126	ContainsNone,
127	/// `∈`
128	Inside,
129	/// `∉`
130	NotInside,
131	/// `⊆`
132	AllInside,
133	/// `⊂`
134	AnyInside,
135	/// `⊄`
136	NoneInside,
137	/// `@123@`
138	Matches,
139	/// `+=`
140	Inc,
141	/// `-=`
142	Dec,
143	/// `+?=`
144	Ext,
145	/// `?:`
146	Tco,
147	/// `??`
148	Nco,
149	/// `<|`
150	KnnOpen,
151	/// `|>`
152	KnnClose,
153}
154
155impl Operator {
156	fn as_str(&self) -> &'static str {
157		match self {
158			Operator::Not => "!",
159			Operator::Add => "+",
160			Operator::Subtract => "-",
161			Operator::Divide => "÷",
162			Operator::Or => "||",
163			Operator::And => "&&",
164			Operator::Mult => "×",
165			Operator::Modulo => "%",
166			Operator::LessEqual => "<=",
167			Operator::GreaterEqual => ">=",
168			Operator::Star => "*",
169			Operator::Power => "**",
170			Operator::Equal => "=",
171			Operator::Exact => "==",
172			Operator::NotEqual => "!=",
173			Operator::AllEqual => "*=",
174			Operator::AnyEqual => "?=",
175			Operator::Like => "~",
176			Operator::NotLike => "!~",
177			Operator::AllLike => "*~",
178			Operator::AnyLike => "?~",
179			Operator::Contains => "∋",
180			Operator::NotContains => "∌",
181			Operator::ContainsAll => "⊇",
182			Operator::ContainsAny => "⊃",
183			Operator::ContainsNone => "⊅",
184			Operator::Inside => "∈",
185			Operator::NotInside => "∉",
186			Operator::AllInside => "⊆",
187			Operator::AnyInside => "⊂",
188			Operator::NoneInside => "⊄",
189			Operator::Matches => "@@",
190			Operator::Inc => "+=",
191			Operator::Dec => "-=",
192			Operator::Ext => "+?=",
193			Operator::Tco => "?:",
194			Operator::Nco => "??",
195			Operator::KnnOpen => "<|",
196			Operator::KnnClose => "|>",
197		}
198	}
199}
200
201/// A delimiting token, denoting the start or end of a certain production.
202#[derive(Clone, Copy, Eq, PartialEq, Hash, Debug)]
203#[non_exhaustive]
204pub enum Delim {
205	/// `()`
206	Paren,
207	/// `[]`
208	Bracket,
209	/// `{}`
210	Brace,
211}
212
213#[derive(Clone, Copy, Eq, PartialEq, Hash, Debug)]
214#[non_exhaustive]
215pub enum DistanceKind {
216	Chebyshev,
217	Cosine,
218	Euclidean,
219	Hamming,
220	Jaccard,
221	Manhattan,
222	Minkowski,
223	Pearson,
224}
225
226impl DistanceKind {
227	pub fn as_str(&self) -> &'static str {
228		match self {
229			DistanceKind::Chebyshev => "CHEBYSHEV",
230			DistanceKind::Cosine => "COSINE",
231			DistanceKind::Euclidean => "EUCLIDEAN",
232			DistanceKind::Hamming => "HAMMING",
233			DistanceKind::Jaccard => "JACCARD",
234			DistanceKind::Manhattan => "MANHATTAN",
235			DistanceKind::Minkowski => "MINKOWSKI",
236			DistanceKind::Pearson => "PEARSON",
237		}
238	}
239}
240
241#[derive(Clone, Copy, Eq, PartialEq, Hash, Debug)]
242#[non_exhaustive]
243pub enum VectorTypeKind {
244	F64,
245	F32,
246	I64,
247	I32,
248	I16,
249}
250
251impl VectorTypeKind {
252	pub fn as_str(&self) -> &'static str {
253		match self {
254			Self::F64 => "F64",
255			Self::F32 => "F32",
256			Self::I64 => "I64",
257			Self::I32 => "I32",
258			Self::I16 => "I16",
259		}
260	}
261}
262
263impl Algorithm {
264	pub fn as_str(&self) -> &'static str {
265		match self {
266			Self::EdDSA => "EDDSA",
267			Self::Es256 => "ES256",
268			Self::Es384 => "ES384",
269			Self::Es512 => "ES512",
270			Self::Hs256 => "HS256",
271			Self::Hs384 => "HS384",
272			Self::Hs512 => "HS512",
273			Self::Ps256 => "PS256",
274			Self::Ps384 => "PS384",
275			Self::Ps512 => "PS512",
276			Self::Rs256 => "RS256",
277			Self::Rs384 => "RS384",
278			Self::Rs512 => "RS512",
279		}
280	}
281}
282
283#[derive(Clone, Copy, Eq, PartialEq, Hash, Debug)]
284pub enum QouteKind {
285	/// `'`
286	Plain,
287	/// `"`
288	PlainDouble,
289	/// `r'`
290	RecordId,
291	/// `r"`
292	RecordIdDouble,
293	/// `u'`
294	Uuid,
295	/// `u"`
296	UuidDouble,
297	/// `d'`
298	DateTime,
299	/// `d"`
300	DateTimeDouble,
301}
302
303impl QouteKind {
304	pub fn as_str(&self) -> &'static str {
305		match self {
306			QouteKind::Plain | QouteKind::PlainDouble => "a strand",
307			QouteKind::RecordId | QouteKind::RecordIdDouble => "a record-id strand",
308			QouteKind::Uuid | QouteKind::UuidDouble => "a uuid",
309			QouteKind::DateTime | QouteKind::DateTimeDouble => "a datetime",
310		}
311	}
312}
313
314#[derive(Clone, Copy, Eq, PartialEq, Hash, Debug)]
315pub enum Glued {
316	Number,
317	Duration,
318	Strand,
319	Datetime,
320	Uuid,
321}
322
323impl Glued {
324	fn as_str(&self) -> &'static str {
325		match self {
326			Glued::Number => "a number",
327			Glued::Strand => "a strand",
328			Glued::Uuid => "a uuid",
329			Glued::Datetime => "a datetime",
330			Glued::Duration => "a duration",
331		}
332	}
333}
334
335/// The type of token
336#[derive(Clone, Copy, Eq, PartialEq, Hash, Debug)]
337#[non_exhaustive]
338pub enum TokenKind {
339	WhiteSpace,
340	Keyword(Keyword),
341	Algorithm(Algorithm),
342	Language(Language),
343	Distance(DistanceKind),
344	VectorType(VectorTypeKind),
345	Operator(Operator),
346	OpenDelim(Delim),
347	CloseDelim(Delim),
348	/// a token denoting the opening of a string, i.e. `r"`
349	Qoute(QouteKind),
350	/// A parameter like `$name`.
351	Parameter,
352	Identifier,
353	/// `<`
354	LeftChefron,
355	/// `>`
356	RightChefron,
357	/// `*`
358	Star,
359	/// `?`
360	Question,
361	/// `$`
362	Dollar,
363	/// `->`
364	ArrowRight,
365	/// '/'
366	ForwardSlash,
367	/// `.`
368	Dot,
369	/// `..`
370	DotDot,
371	/// `...` or `…`
372	DotDotDot,
373	/// `;`
374	SemiColon,
375	/// `::`
376	PathSeperator,
377	/// `:`
378	Colon,
379	/// `,`
380	Comma,
381	/// `|`
382	Vert,
383	/// `@`
384	At,
385	/// A token which indicates the end of the file.
386	Eof,
387	/// A token consiting of one or more ascii digits.
388	Digits,
389	/// The Not-A-Number number token.
390	NaN,
391	/// A token which is a compound token which has been glued together and then put back into the
392	/// token buffer. This is required for some places where we need to look past possible compound tokens.
393	Glued(Glued),
394	/// A token which could not be properly lexed.
395	Invalid,
396}
397
398impl fmt::Display for TokenKind {
399	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
400		f.write_str(self.as_str())
401	}
402}
403
404/// An assertion statically checking that the size of Tokenkind remains two bytes
405const _TOKEN_KIND_SIZE_ASSERT: [(); 2] = [(); std::mem::size_of::<TokenKind>()];
406
407impl TokenKind {
408	pub fn has_data(&self) -> bool {
409		matches!(self, TokenKind::Identifier | TokenKind::Glued(_))
410	}
411
412	fn algorithm_as_str(alg: Algorithm) -> &'static str {
413		match alg {
414			Algorithm::EdDSA => "EDDSA",
415			Algorithm::Es256 => "ES256",
416			Algorithm::Es384 => "ES384",
417			Algorithm::Es512 => "ES512",
418			Algorithm::Hs256 => "HS256",
419			Algorithm::Hs384 => "HS384",
420			Algorithm::Hs512 => "HS512",
421			Algorithm::Ps256 => "PS256",
422			Algorithm::Ps384 => "PS384",
423			Algorithm::Ps512 => "PS512",
424			Algorithm::Rs256 => "RS256",
425			Algorithm::Rs384 => "RS384",
426			Algorithm::Rs512 => "RS512",
427		}
428	}
429
430	pub fn as_str(&self) -> &'static str {
431		match *self {
432			TokenKind::Keyword(x) => x.as_str(),
433			TokenKind::Operator(x) => x.as_str(),
434			TokenKind::Algorithm(x) => Self::algorithm_as_str(x),
435			TokenKind::Language(x) => x.as_str(),
436			TokenKind::Distance(x) => x.as_str(),
437			TokenKind::VectorType(x) => x.as_str(),
438			TokenKind::OpenDelim(Delim::Paren) => "(",
439			TokenKind::OpenDelim(Delim::Brace) => "{",
440			TokenKind::OpenDelim(Delim::Bracket) => "[",
441			TokenKind::CloseDelim(Delim::Paren) => ")",
442			TokenKind::CloseDelim(Delim::Brace) => "}",
443			TokenKind::CloseDelim(Delim::Bracket) => "]",
444			TokenKind::Parameter => "a parameter",
445			TokenKind::Identifier => "an identifier",
446			TokenKind::LeftChefron => "<",
447			TokenKind::RightChefron => ">",
448			TokenKind::Star => "*",
449			TokenKind::Dollar => "$",
450			TokenKind::Question => "?",
451			TokenKind::ArrowRight => "->",
452			TokenKind::ForwardSlash => "/",
453			TokenKind::Dot => ".",
454			TokenKind::DotDot => "..",
455			TokenKind::DotDotDot => "...",
456			TokenKind::SemiColon => ";",
457			TokenKind::PathSeperator => "::",
458			TokenKind::Colon => ":",
459			TokenKind::Comma => ",",
460			TokenKind::Vert => "|",
461			TokenKind::At => "@",
462			TokenKind::Invalid => "Invalid",
463			TokenKind::Eof => "Eof",
464			TokenKind::WhiteSpace => "whitespace",
465			TokenKind::Qoute(x) => x.as_str(),
466			TokenKind::Digits => "a number",
467			TokenKind::NaN => "NaN",
468			TokenKind::Glued(x) => x.as_str(),
469			// below are small broken up tokens which are most of the time identifiers.
470		}
471	}
472}
473
474#[derive(Clone, Copy, Eq, PartialEq, Hash, Debug)]
475pub struct Token {
476	pub kind: TokenKind,
477	pub span: Span,
478}
479
480impl Token {
481	pub const fn invalid() -> Token {
482		Token {
483			kind: TokenKind::Invalid,
484			span: Span::empty(),
485		}
486	}
487
488	/// Returns if the token is invalid.
489	pub fn is_invalid(&self) -> bool {
490		matches!(self.kind, TokenKind::Invalid)
491	}
492
493	/// Returns if the token is `end of file`.
494	pub fn is_eof(&self) -> bool {
495		matches!(self.kind, TokenKind::Eof)
496	}
497
498	pub fn is_followed_by(&self, other: &Token) -> bool {
499		self.span.is_followed_by(&other.span)
500	}
501
502	pub fn follows_from(&self, other: &Token) -> bool {
503		self.span.follows_from(&other.span)
504	}
505}
506
507/// A token which is mad up of more complex inner parts.
508#[derive(Clone, Copy, Eq, PartialEq, Hash, Debug)]
509pub struct CompoundToken<T> {
510	pub value: T,
511	pub span: Span,
512}
513
514/// A compound token which lexes a javascript function body.
515pub struct JavaScript;