surrealdb_core/syn/lexer/
mod.rs1mod byte;
2mod char;
3pub mod compound;
4mod ident;
5pub mod keywords;
6mod reader;
7mod unicode;
8
9#[cfg(test)]
10mod test;
11
12pub use reader::{BytesReader, CharError};
13
14use crate::syn::{
15 error::{bail, SyntaxError},
16 token::{Span, Token, TokenKind},
17};
18
19#[non_exhaustive]
32pub struct Lexer<'a> {
33 pub(super) reader: BytesReader<'a>,
35 last_offset: u32,
37 scratch: String,
40
41 pub(super) string: Option<String>,
55 pub(super) error: Option<SyntaxError>,
56}
57
58impl<'a> Lexer<'a> {
59 pub fn new(source: &'a [u8]) -> Lexer<'a> {
63 let reader = BytesReader::new(source);
64 assert!(reader.len() <= u32::MAX as usize, "source code exceeded maximum size");
65 Lexer {
66 reader,
67 last_offset: 0,
68 scratch: String::new(),
69 string: None,
70 error: None,
71 }
72 }
73
74 pub fn reset(&mut self) {
78 self.last_offset = 0;
79 self.scratch.clear();
80 self.string = None;
81 self.error = None;
82 }
83
84 pub fn change_source<'b>(self, source: &'b [u8]) -> Lexer<'b> {
91 let reader = BytesReader::<'b>::new(source);
92 assert!(reader.len() <= u32::MAX as usize, "source code exceeded maximum size");
93 Lexer {
94 reader,
95 last_offset: 0,
96 scratch: self.scratch,
97 string: self.string,
98 error: self.error,
99 }
100 }
101
102 pub fn next_token(&mut self) -> Token {
106 let Some(byte) = self.reader.next() else {
107 return self.eof_token();
108 };
109 if byte.is_ascii() {
110 self.lex_ascii(byte)
111 } else {
112 self.lex_char(byte)
113 }
114 }
115
116 fn eof_token(&mut self) -> Token {
121 Token {
122 kind: TokenKind::Eof,
123 span: Span {
124 offset: self.last_offset,
125 len: 0,
126 },
127 }
128 }
129
130 fn invalid_token(&mut self, error: SyntaxError) -> Token {
132 self.error = Some(error);
133 self.finish_token(TokenKind::Invalid)
134 }
135
136 pub(crate) fn current_span(&self) -> Span {
138 let new_offset = self.reader.offset() as u32;
140 let len = new_offset - self.last_offset;
141 Span {
142 offset: self.last_offset,
143 len,
144 }
145 }
146
147 pub(crate) fn span_since(&self, offset: usize) -> Span {
148 let new_offset = self.reader.offset() as u32;
149 let len = new_offset - offset as u32;
150 Span {
151 offset: offset as u32,
152 len,
153 }
154 }
155
156 fn advance_span(&mut self) -> Span {
157 let span = self.current_span();
158 self.last_offset = self.reader.offset() as u32;
159 span
160 }
161
162 fn finish_token(&mut self, kind: TokenKind) -> Token {
166 Token {
167 kind,
168 span: self.advance_span(),
169 }
170 }
171
172 pub(crate) fn backup_before(&mut self, span: Span) {
178 self.reader.backup(span.offset as usize);
179 self.last_offset = span.offset;
180 }
181
182 pub(crate) fn backup_after(&mut self, span: Span) {
188 let offset = span.offset + span.len;
189 self.reader.backup(offset as usize);
190 self.last_offset = offset;
191 }
192
193 fn eat(&mut self, byte: u8) -> bool {
198 if self.reader.peek() == Some(byte) {
199 self.reader.next();
200 true
201 } else {
202 false
203 }
204 }
205
206 fn eat_when<F: FnOnce(u8) -> bool>(&mut self, f: F) -> bool {
211 let Some(x) = self.reader.peek() else {
212 return false;
213 };
214 if f(x) {
215 self.reader.next();
216 true
217 } else {
218 false
219 }
220 }
221
222 fn expect(&mut self, c: char) -> Result<(), SyntaxError> {
223 match self.reader.peek() {
224 Some(x) => {
225 let offset = self.reader.offset() as u32;
226 self.reader.next();
227 let char = self.reader.convert_to_char(x)?;
228 if char == c {
229 return Ok(());
230 }
231 let len = self.reader.offset() as u32 - offset;
232 bail!(
233 "Unexpected character `{char}` expected `{c}`",
234 @Span {
235 offset,
236 len
237 }
238 )
239 }
240 None => {
241 bail!("Unexpected end of file, expected character `{c}`", @self.current_span())
242 }
243 }
244 }
245
246 pub fn span_str(&self, span: Span) -> &'a str {
249 std::str::from_utf8(self.span_bytes(span)).expect("invalid span segment for source")
250 }
251
252 pub fn span_bytes(&self, span: Span) -> &'a [u8] {
255 self.reader.span(span)
256 }
257
258 pub fn assert_finished(&self) -> Result<(), SyntaxError> {
260 if !self.reader.is_empty() {
261 let offset = self.reader.offset() as u32;
262 let len = self.reader.remaining().len() as u32;
263 let span = Span {
264 offset,
265 len,
266 };
267 bail!("Trailing characters", @span)
268 }
269 Ok(())
270 }
271}
272
273impl Iterator for Lexer<'_> {
274 type Item = Token;
275
276 fn next(&mut self) -> Option<Self::Item> {
277 let token = self.next_token();
278 if token.is_eof() {
279 return None;
280 }
281 Some(token)
282 }
283}