1mod error;
16mod quoted_strings;
17mod wrapped_parsers;
18
19use std::str::FromStr;
20
21use nom::{
22 bytes::complete::{is_a, take_till, take_while, take_while1},
23 character::complete::{digit1, one_of},
24 combinator::{all_consuming, map, recognize, value},
25 multi::many0,
26 number::complete::double,
27 sequence::{pair, preceded, terminated, tuple},
28 Finish, IResult,
29};
30use nom_locate::LocatedSpan;
31use wrapped_parsers::{alt, tag};
32
33pub use super::token::{KeywordToken, Token, TokenWithLocation};
34use crate::parser::lexer::wrapped_parsers::expecting;
35use crate::parser::token::token_with_location;
36pub(crate) use error::InternalLexError;
37pub use error::{LexError, LexErrorKind};
38
39#[derive(Debug, Copy, Clone, PartialEq, Eq, strum::Display, strum::EnumString)]
40#[strum(serialize_all = "SCREAMING-KEBAB-CASE")]
41pub enum Command {
42 Add,
43 And,
44 Call,
45 Capture,
46 Convert,
47 Declare,
48 #[strum(to_string = "DEFCAL")]
49 DefCal,
50 #[strum(to_string = "DEFCIRCUIT")]
51 DefCircuit,
52 #[strum(to_string = "DEFFRAME")]
53 DefFrame,
54 #[strum(to_string = "DEFGATE")]
55 DefGate,
56 #[strum(to_string = "DEFWAVEFORM")]
57 DefWaveform,
58 Delay,
59 Div,
60 Eq,
61 Exchange,
62 Fence,
63 GE,
64 GT,
65 Halt,
66 Include,
67 Ior,
68 Jump,
69 JumpUnless,
70 JumpWhen,
71 Label,
72 LE,
73 Load,
74 LT,
75 Measure,
76 Move,
77 Mul,
78 Neg,
79 Nop,
80 Not,
81 Pragma,
82 Pulse,
83 RawCapture,
84 Reset,
85 SetFrequency,
86 SetPhase,
87 SetScale,
88 ShiftFrequency,
89 ShiftPhase,
90 SwapPhases,
91 Store,
92 Sub,
93 Wait,
94 Xor,
95}
96
97#[derive(Debug, Clone, PartialEq, Eq, strum::Display, strum::EnumString)]
98#[strum(serialize_all = "UPPERCASE")]
99pub enum DataType {
100 Bit,
101 Octet,
102 Real,
103 Integer,
104}
105
106#[derive(Debug, Clone, PartialEq, Eq, strum::Display, strum::EnumString)]
107#[strum(serialize_all = "UPPERCASE")]
108pub enum Modifier {
109 Controlled,
110 Dagger,
111 Forked, }
113
114#[derive(Debug, Clone, PartialEq, Eq, strum::Display)]
115pub enum Operator {
116 #[strum(serialize = "^")]
117 Caret,
118 #[strum(serialize = "-")]
119 Minus,
120 #[strum(serialize = "+")]
121 Plus,
122 #[strum(serialize = "/")]
123 Slash,
124 #[strum(serialize = "*")]
125 Star,
126}
127
128pub type LexInput<'a> = LocatedSpan<&'a str>;
129pub(crate) type InternalLexResult<'a, T = Token, E = InternalLexError<'a>> =
130 IResult<LexInput<'a>, T, E>;
131pub type LexResult<'a, T = Token, E = LexError> = IResult<LexInput<'a>, T, E>;
132
133pub(crate) fn lex(input: LexInput) -> Result<Vec<TokenWithLocation>, LexError> {
135 all_consuming(_lex)(input)
136 .finish()
137 .map(|(_, tokens)| tokens)
138 .map_err(LexError::from)
139}
140
141fn _lex(input: LexInput) -> InternalLexResult<Vec<TokenWithLocation>> {
142 terminated(
143 many0(alt(
144 "indentation or a token preceded by whitespace",
145 (lex_indent, preceded(many0(tag(" ")), lex_token)),
146 )),
147 many0(one_of("\n\t ")),
148 )(input)
149}
150
151fn lex_indent(input: LexInput) -> InternalLexResult<TokenWithLocation> {
154 alt(
155 "indentation",
156 (
157 token_with_location(value(Token::Indentation, tag(" "))),
158 token_with_location(value(Token::Indentation, tag("\t"))),
159 ),
160 )(input)
161}
162
163fn lex_token(input: LexInput) -> InternalLexResult<TokenWithLocation> {
164 alt(
165 "a token",
166 (
167 token_with_location(lex_comment),
168 token_with_location(lex_punctuation),
169 token_with_location(lex_target),
170 token_with_location(lex_string),
171 token_with_location(lex_operator),
173 token_with_location(lex_variable),
174 token_with_location(lex_keyword_or_identifier),
178 token_with_location(lex_number),
179 ),
180 )(input)
181}
182
183fn lex_comment(input: LexInput) -> InternalLexResult {
184 let (input, _) = tag("#")(input)?;
185 let (input, content) = take_till(|c| c == '\n')(input)?;
186 Ok((input, Token::Comment(content.to_string())))
187}
188
189fn keyword_or_identifier(identifier: String) -> Token {
190 fn parse<T: FromStr>(token: impl Fn(T) -> Token, identifier: &str) -> Result<Token, T::Err> {
191 T::from_str(identifier).map(token)
192 }
193
194 parse(KeywordToken::into, &identifier)
195 .or_else(|_| parse(Token::Command, &identifier))
196 .or_else(|_| parse(Token::DataType, &identifier))
197 .or_else(|_| parse(Token::Modifier, &identifier))
198 .unwrap_or(Token::Identifier(identifier))
199}
200
201fn is_valid_identifier_leading_character(chr: char) -> bool {
202 chr.is_ascii_alphabetic() || chr == '_'
203}
204
205fn is_valid_identifier_end_character(chr: char) -> bool {
206 is_valid_identifier_leading_character(chr) || chr.is_ascii_digit()
207}
208
209fn is_dash(chr: char) -> bool {
210 chr == '-'
211}
212
213fn lex_identifier_raw(input: LexInput) -> InternalLexResult<String> {
214 expecting(
215 "a valid identifier",
216 map(
217 tuple::<_, _, InternalLexError, _>((
218 take_while1(is_valid_identifier_leading_character),
219 take_while(is_valid_identifier_end_character),
220 recognize(many0(pair(
221 take_while1(is_dash),
222 take_while1(is_valid_identifier_end_character),
223 ))),
224 )),
225 |(leading, middle, trailing_dash_vars)| {
226 format!("{leading}{middle}{trailing_dash_vars}")
227 },
228 ),
229 )(input)
230}
231
232fn lex_keyword_or_identifier(input: LexInput) -> InternalLexResult {
233 let (input, identifier) = lex_identifier_raw(input)?;
234 let token = keyword_or_identifier(identifier);
235 Ok((input, token))
236}
237
238fn lex_target(input: LexInput) -> InternalLexResult {
239 let (input, _) = tag("@")(input)?;
240 let (input, label) = lex_identifier_raw(input)?;
241 Ok((input, Token::Target(label)))
242}
243
244fn lex_number(input: LexInput) -> InternalLexResult {
245 let (input, float_string): (LexInput, LexInput) = recognize(double)(input)?;
246 let integer_parse_result: IResult<LexInput, _> = all_consuming(digit1)(float_string);
247 Ok((
248 input,
249 match integer_parse_result {
250 Ok(_) => float_string
251 .parse::<u64>()
252 .map(Token::Integer)
253 .map_err(|e| InternalLexError::from_kind(input, e.into()))
254 .map_err(nom::Err::Failure)?,
255 Err(_) => Token::Float(double(float_string)?.1),
256 },
257 ))
258}
259
260fn lex_operator(input: LexInput) -> InternalLexResult {
261 use Operator::*;
262 map(
263 alt(
264 "an operator",
265 (
266 value(Caret, tag("^")),
267 value(Minus, tag("-")),
268 value(Plus, tag("+")),
269 value(Slash, tag("/")),
270 value(Star, tag("*")),
271 ),
272 ),
273 Token::Operator,
274 )(input)
275}
276
277fn recognize_newlines(input: LexInput) -> InternalLexResult<LexInput> {
278 alt(
279 "one or more newlines",
280 (
281 is_a::<_, _, InternalLexError>("\n"),
282 is_a::<_, _, InternalLexError>("\r\n"),
283 ),
284 )(input)
285}
286
287fn lex_punctuation(input: LexInput) -> InternalLexResult {
288 use Token::*;
289 alt(
290 "punctuation",
291 (
292 value(Colon, tag(":")),
293 value(Comma, tag(",")),
294 value(
295 Indentation,
296 alt("four spaces or a tab character", (tag(" "), tag("\t"))),
297 ),
298 value(LBracket, tag("[")),
299 value(LParenthesis, tag("(")),
300 value(NewLine, recognize_newlines),
301 value(RBracket, tag("]")),
302 value(RParenthesis, tag(")")),
303 value(Semicolon, tag(";")),
304 ),
305 )(input)
306}
307
308fn lex_string(input: LexInput) -> InternalLexResult {
309 map(quoted_strings::unescaped_quoted_string, Token::String)(input)
310}
311
312fn lex_variable(input: LexInput) -> InternalLexResult {
313 map(preceded(tag("%"), lex_identifier_raw), |ident| {
314 Token::Variable(ident)
315 })(input)
316}
317
318#[cfg(test)]
319mod tests {
320 use nom_locate::LocatedSpan;
321 use rstest::*;
322
323 use crate::parser::{common::tests::KITCHEN_SINK_QUIL, DataType};
324
325 use super::{lex, Command, Operator, Token};
326
327 #[test]
328 fn comment() {
329 let input = LocatedSpan::new("# hello\n#world\n#\n#");
330 let tokens = lex(input).unwrap();
331 assert_eq!(
332 tokens,
333 vec![
334 Token::Comment(" hello".to_owned()),
335 Token::NewLine,
336 Token::Comment("world".to_owned()),
337 Token::NewLine,
338 Token::Comment("".to_owned()),
339 Token::NewLine,
340 Token::Comment("".to_owned())
341 ]
342 )
343 }
344
345 #[test]
346 fn keywords() {
347 let input = LocatedSpan::new("DEFGATE DEFCIRCUIT JUMP-WHEN MATRIX LOAD load LOAD-MEMORY");
348 let tokens = lex(input).unwrap();
349 assert_eq!(
350 tokens,
351 vec![
352 Token::Command(Command::DefGate),
353 Token::Command(Command::DefCircuit),
354 Token::Command(Command::JumpWhen),
355 Token::Matrix,
356 Token::Command(Command::Load),
357 Token::Identifier(String::from("load")),
358 Token::Identifier(String::from("LOAD-MEMORY"))
359 ]
360 )
361 }
362
363 #[test]
364 fn number() {
365 let input = LocatedSpan::new("2 2i 2.0 2e3 2.0e3 (1+2i)");
366 let tokens = lex(input).unwrap();
367 assert_eq!(
368 tokens,
369 vec![
370 Token::Integer(2),
371 Token::Integer(2),
372 Token::Identifier("i".to_owned()),
373 Token::Float(2.0),
374 Token::Float(2000f64),
375 Token::Float(2000f64),
376 Token::LParenthesis,
377 Token::Integer(1),
378 Token::Operator(Operator::Plus),
379 Token::Integer(2),
380 Token::Identifier("i".to_owned()),
381 Token::RParenthesis
382 ]
383 )
384 }
385
386 #[test]
387 fn string() {
388 let input = LocatedSpan::new("\"hello\"\n\"world\"");
389 let tokens = lex(input).unwrap();
390 assert_eq!(
391 tokens,
392 vec![
393 Token::String("hello".to_owned()),
394 Token::NewLine,
395 Token::String("world".to_owned())
396 ]
397 )
398 }
399
400 #[test]
401 fn gate_operation() {
402 let input = LocatedSpan::new("I 0; RX 1\nCZ 0 1");
403 let tokens = lex(input).unwrap();
404 assert_eq!(
405 tokens,
406 vec![
407 Token::Identifier("I".to_owned()),
408 Token::Integer(0),
409 Token::Semicolon,
410 Token::Identifier("RX".to_owned()),
411 Token::Integer(1),
412 Token::NewLine,
413 Token::Identifier("CZ".to_owned()),
414 Token::Integer(0),
415 Token::Integer(1),
416 ]
417 )
418 }
419
420 #[test]
421 fn label() {
422 let input = LocatedSpan::new("@hello\n@world");
423 let tokens = lex(input).unwrap();
424 assert_eq!(
425 tokens,
426 vec![
427 Token::Target("hello".to_owned()),
428 Token::NewLine,
429 Token::Target("world".to_owned())
430 ]
431 )
432 }
433
434 #[test]
435 fn indentation() {
436 let input = LocatedSpan::new(" ");
437 let tokens = lex(input).unwrap();
438 assert_eq!(tokens, vec![Token::Indentation,])
439 }
440
441 #[test]
442 fn indented_block() {
443 let input = LocatedSpan::new("DEFGATE Name AS PERMUTATION:\n\t1,0\n 0,1");
444 let tokens = lex(input).unwrap();
445 assert_eq!(
446 tokens,
447 vec![
448 Token::Command(Command::DefGate),
449 Token::Identifier("Name".to_owned()),
450 Token::As,
451 Token::Permutation,
452 Token::Colon,
453 Token::NewLine,
454 Token::Indentation,
455 Token::Integer(1),
456 Token::Comma,
457 Token::Integer(0),
458 Token::NewLine,
459 Token::Indentation,
460 Token::Integer(0),
461 Token::Comma,
462 Token::Integer(1),
463 ]
464 )
465 }
466
467 #[test]
468 fn surrounding_whitespace() {
469 let input = LocatedSpan::new("\nI 0\n \n");
470 let tokens = lex(input).unwrap();
471 assert_eq!(
472 tokens,
473 vec![
474 Token::NewLine,
475 Token::Identifier("I".to_owned()),
476 Token::Integer(0),
477 Token::NewLine,
478 Token::Indentation,
479 Token::NewLine
480 ]
481 )
482 }
483
484 #[rstest(input, expected,
485 case("_", vec![Token::Identifier("_".to_string())]),
486 case("a", vec![Token::Identifier("a".to_string())]),
487 case("_a-2_b-2_", vec![Token::Identifier("_a-2_b-2_".to_string())]),
488 case("a-2-%var", vec![
489 Token::Identifier("a-2".to_string()),
490 Token::Operator(Operator::Minus),
491 Token::Variable("var".to_string())
492 ]),
493 case("BIT", vec![Token::DataType(DataType::Bit)]),
494 case("BITS", vec![Token::Identifier("BITS".to_string())]),
495 case("NaN", vec![Token::Identifier("NaN".to_string())]),
496 case("nan", vec![Token::Identifier("nan".to_string())]),
497 case("NaNa", vec![Token::Identifier("NaNa".to_string())]),
498 case("nana", vec![Token::Identifier("nana".to_string())]),
499 case("INF", vec![Token::Identifier("INF".to_string())]),
500 case("Infinity", vec![Token::Identifier("Infinity".to_string())]),
501 case("Inferior", vec![Token::Identifier("Inferior".to_string())]),
502 case("-NaN", vec![Token::Operator(Operator::Minus), Token::Identifier("NaN".to_string())]),
503 case("-inf", vec![Token::Operator(Operator::Minus), Token::Identifier("inf".to_string())]),
504 case("-Infinity", vec![
505 Token::Operator(Operator::Minus),
506 Token::Identifier("Infinity".to_string())
507 ]),
508 case("-inferior", vec![
509 Token::Operator(Operator::Minus),
510 Token::Identifier("inferior".to_string())
511 ]),
512 )]
513 fn it_lexes_identifier(input: &str, expected: Vec<Token>) {
514 let input = LocatedSpan::new(input);
515 let tokens = lex(input).unwrap();
516 assert_eq!(tokens, expected);
517 }
518
519 #[rstest(input, not_expected,
520 case("a-", vec![Token::Identifier("_-".to_string())]),
521 case("-a", vec![Token::Identifier("-a".to_string())]),
522 case("a\\", vec![Token::Identifier("_\\".to_string())]),
523 )]
524 fn it_fails_to_lex_identifier(input: &str, not_expected: Vec<Token>) {
525 let input = LocatedSpan::new(input);
526 if let Ok(tokens) = lex(input) {
527 assert_ne!(tokens, not_expected);
528 }
529 }
530
531 #[test]
533 fn kitchen_sink() {
534 let input = LocatedSpan::new(KITCHEN_SINK_QUIL);
535
536 lex(input).unwrap();
537 }
538}