souper_ir/
parse.rs

1//! Parsing the Souper IR text format.
2//!
3//! This module provides both high- and low-level parsing utilities:
4//!
5//! * The high-level parsing functions are `parse_*_file` and `parse_*_string`
6//!   which can be used to parse full replacements, LHSes, or RHSes either from
7//!   files on disk or from an in-memory string.
8//!
9//! * The low-level parsing utilities are [the `Parse`
10//!   trait][crate::parse::Parse] and [the `Parser`
11//!   struct][crate::parse::Parser]. These can be used to parse a single
12//!    instance of an LHS or RHS, for example.
13//!
14//! ## Example
15//!
16//! ```
17//! # fn main() -> souper_ir::parse::Result<()> {
18//! use souper_ir::parse::parse_replacements_str;
19//! use std::path::Path;
20//!
21//! let replacements = parse_replacements_str("
22//!     ;; First replacement.
23//!     %x:i32 = var
24//!     %2lx = slt 2, %x
25//!     pc %2lx 1
26//!     %1lx = slt 1, %x
27//!     cand %1lx 1
28//!
29//!     ;; Second replacement.
30//!     %bb = block 3
31//!     %phi1 = phi %bb, 1:i32, 2, 3
32//!     %phi2 = phi %bb, 2:i32, 4, 6
33//!     %phi1x2 = mul %phi1, 2
34//!     cand %phi2 %phi1x2
35//! ", Some(Path::new("example.souper")))?;
36//!
37//! assert_eq!(replacements.len(), 2);
38//! # Ok(())
39//! # }
40//! ```
41
42use crate::ast;
43use id_arena::Arena;
44use std::{
45    borrow::Cow,
46    collections::HashMap,
47    convert::TryInto,
48    iter::Peekable,
49    mem,
50    path::{Path, PathBuf},
51    str::CharIndices,
52    str::FromStr,
53};
54
55/*
56
57Here is the grammar, as far as I can tell. See
58https://github.com/google/souper/issues/782.
59
60```
61<replacement> ::= <lhs> <rhs>
62                | <full>
63
64<lhs> ::= <statement>* <infer>
65
66<infer> ::= 'infer' <valname> <root-attribute>*
67
68<rhs> ::= <statement>* <result>
69
70<result> ::= 'result' <operand>
71
72<full> ::= <lhs> <rhs>
73         | <statement>* <cand>
74
75<cand> ::= 'cand' <valname> <operand> <root-attribute>*
76
77<statement> ::= <assignment>
78              | <pc>
79              | <blockpc>
80
81<assignment> ::= <assignment-lhs> '=' <assignment-rhs>
82
83<assignment-lhs> ::= <valname> (':' <type>)?
84
85<assignment-rhs> ::= 'var' <attribute>*
86                   | 'block' <int>
87                   | 'phi' <valname> (',' <operand>)*
88                   | 'reservedinst' <attribute>*
89                   | 'reservedconst' <attribute>*
90                   | <instruction> <attribute>*
91
92<instruction> ::= 'add' <operand> ',' <operand>
93                | 'addnsw' <operand> ',' <operand>
94                | 'addnuw' <operand> ',' <operand>
95                | 'addnw' <operand> ',' <operand>
96                | 'sub' <operand> ',' <operand>
97                | 'subnsw' <operand> ',' <operand>
98                | 'subnuw' <operand> ',' <operand>
99                | 'subnw' <operand> ',' <operand>
100                | 'mul' <operand> ',' <operand>
101                | 'mulnsw' <operand> ',' <operand>
102                | 'mulnuw' <operand> ',' <operand>
103                | 'mulnw' <operand> ',' <operand>
104                | 'udiv' <operand> ',' <operand>
105                | 'sdiv' <operand> ',' <operand>
106                | 'udivexact' <operand> ',' <operand>
107                | 'sdivexact' <operand> ',' <operand>
108                | 'urem' <operand> ',' <operand>
109                | 'srem' <operand> ',' <operand>
110                | 'and' <operand> ',' <operand>
111                | 'or' <operand> ',' <operand>
112                | 'xor' <operand> ',' <operand>
113                | 'shl' <operand> ',' <operand>
114                | 'shlnsw' <operand> ',' <operand>
115                | 'shlnuw' <operand> ',' <operand>
116                | 'shlnw' <operand> ',' <operand>
117                | 'lshr' <operand> ',' <operand>
118                | 'lshrexact' <operand> ',' <operand>
119                | 'ashr' <operand> ',' <operand>
120                | 'ashrexact' <operand> ',' <operand>
121                | 'select' <operand> ',' <operand> ',' <operand>
122                | 'zext' <operand>
123                | 'sext' <operand>
124                | 'trunc' <operand>
125                | 'eq' <operand> ',' <operand>
126                | 'ne' <operand> ',' <operand>
127                | 'ult' <operand> ',' <operand>
128                | 'slt' <operand> ',' <operand>
129                | 'ule' <operand> ',' <operand>
130                | 'sle' <operand> ',' <operand>
131                | 'ctpop' <operand>
132                | 'bswap' <operand>
133                | 'bitreverse' <operand>
134                | 'cttz' <operand>
135                | 'ctlz' <operand>
136                | 'fshl' <operand> ',' <operand> ',' <operand>
137                | 'fshr' <operand> ',' <operand> ',' <operand>
138                | 'sadd.with.overflow' <operand> ',' <operand>
139                | 'uadd.with.overflow' <operand> ',' <operand>
140                | 'ssub.with.overflow' <operand> ',' <operand>
141                | 'usub.with.overflow' <operand> ',' <operand>
142                | 'smul.with.overflow' <operand> ',' <operand>
143                | 'umul.with.overflow' <operand> ',' <operand>
144                | 'sadd.sat' <operand> ',' <operand>
145                | 'uadd.sat' <operand> ',' <operand>
146                | 'ssub.sat' <operand> ',' <operand>
147                | 'usub.sat' <operand> ',' <operand>
148                | 'extractvalue' <operand> ',' <operand>
149                | 'hole'
150                | 'freeze' <operand>
151
152<operand> ::= <valname>
153            | <constant>
154
155<root-attribute> ::= '(' <root-attribute-inner> ')'
156
157<root-attribute-inner> ::= 'demandedBits' '=' regexp([0|1]+)
158                         | 'harvestedFromUse'
159
160<attribute> ::= '(' <attribute-inner> ')'
161
162<attribute-inner> ::= 'knownBits' '=' regexp([0|1|x]+)
163                    | 'powerOfTwo'
164                    | 'negative'
165                    | 'nonNegative'
166                    | 'nonZero'
167                    | 'hasExternalUses'
168                    | 'signBits' '=' <int>
169                    | 'range' '=' '[' <int> ',' <int> ')'
170
171<pc> ::= 'pc' <operand> <operand>
172
173<blockpc> ::= 'blockpc' <valname> <int> <operand> <operand>
174
175<constant> ::= <int> (':' <type>)?
176
177<int> ::= regexp(-?\d+)
178
179<type> ::= regexp(i\d+)
180
181<valname> ::= regexp(%[a-zA-Z0-9_]+)
182
183<ident> ::= regexp([a-zA-Z][a-zA-Z0-9_]+)
184```
185
186*/
187
188/// An error that occurs during parsing.
189pub struct ParseError {
190    inner: Box<ParseErrorInner>,
191}
192
193impl std::fmt::Debug for ParseError {
194    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
195        write!(f, "{}", self)
196    }
197}
198
199#[derive(Debug)]
200struct ParseErrorInner {
201    kind: ParseErrorKind,
202    pos: Option<(usize, usize, String)>,
203    filepath: Option<PathBuf>,
204}
205
206#[derive(Debug)]
207enum ParseErrorKind {
208    Io(std::io::Error),
209    Parse(usize, String),
210}
211
212impl std::fmt::Display for ParseError {
213    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
214        match &self.inner.kind {
215            ParseErrorKind::Io(io) => write!(f, "IO error: {}", io),
216            ParseErrorKind::Parse(offset, msg) => {
217                let (line, column, line_text) = if let Some((l, c, t)) = &self.inner.pos {
218                    (*l, *c, Cow::Borrowed(t))
219                } else {
220                    (0, 0, Cow::Owned(format!("byte offset {}", offset)))
221                };
222                write!(
223                    f,
224                    "\
225{file}:{line}:{column}: {msg}
226     |
227{line:4} | {line_text}
228     | {marker:>column$}",
229                    file = self
230                        .inner
231                        .filepath
232                        .as_ref()
233                        .map_or(Path::new("unknown"), |p| &*p)
234                        .display(),
235                    line = line + 1,
236                    column = column + 1,
237                    line_text = line_text,
238                    msg = msg,
239                    marker = "^"
240                )
241            }
242        }
243    }
244}
245
246impl std::error::Error for ParseError {}
247
248impl From<std::io::Error> for ParseError {
249    fn from(e: std::io::Error) -> Self {
250        ParseError {
251            inner: Box::new(ParseErrorInner {
252                kind: ParseErrorKind::Io(e),
253                pos: None,
254                filepath: None,
255            }),
256        }
257    }
258}
259
260impl ParseError {
261    pub(crate) fn new(offset: usize, msg: impl Into<String>) -> Self {
262        ParseError {
263            inner: Box::new(ParseErrorInner {
264                kind: ParseErrorKind::Parse(offset, msg.into()),
265                pos: None,
266                filepath: None,
267            }),
268        }
269    }
270
271    fn offset(&self) -> Option<usize> {
272        if let ParseErrorKind::Parse(offset, _) = self.inner.kind {
273            Some(offset)
274        } else {
275            None
276        }
277    }
278
279    fn line_col_in(source: &str, offset: usize) -> Option<(usize, usize)> {
280        let mut current_offset = 0;
281        for (line_num, line) in source.split_terminator('\n').enumerate() {
282            // +1 for the `\n`.
283            current_offset += line.len() + 1;
284
285            if current_offset >= offset {
286                return Some((line_num, line.len().saturating_sub(current_offset - offset)));
287            }
288        }
289
290        None
291    }
292
293    pub(crate) fn set_source(&mut self, source: &str) {
294        if let Some(offset) = self.offset() {
295            if let Some((line, col)) = Self::line_col_in(source, offset) {
296                let line_text = source.lines().nth(line).unwrap_or("").to_string();
297                self.inner.pos = Some((line, col, line_text));
298            }
299        }
300    }
301
302    pub(crate) fn set_filepath(&mut self, filepath: impl Into<PathBuf>) {
303        self.inner.filepath = Some(filepath.into());
304    }
305}
306
307/// A `Result` type for parsing.
308///
309/// Either `Ok(T)` or `Err(ParseError)`.
310pub type Result<T> = std::result::Result<T, ParseError>;
311
312/// Keep parsing `P`s until we reach EOF.
313fn parse_until_eof<P>(parser: &mut Parser) -> Result<Vec<P>>
314where
315    P: Parse,
316{
317    let mut ps = vec![];
318    while !parser.eof()? {
319        ps.push(P::parse(parser)?);
320    }
321    Ok(ps)
322}
323
324/// Execute the given function and if it returns a parse error, set the source
325/// and filename on the error.
326fn with_source_and_file<T>(
327    source: &str,
328    file: Option<&Path>,
329    f: impl FnOnce() -> Result<T>,
330) -> Result<T> {
331    f().map_err(|mut e| {
332        e.set_source(source);
333        if let Some(f) = file {
334            e.set_filepath(f);
335        }
336        e
337    })
338}
339
340/// Parse a sequence of [`Replacement`s][crate::ast::Replacement] from an
341/// in-memory string.
342pub fn parse_replacements_str(
343    source: &str,
344    filename: Option<&Path>,
345) -> Result<Vec<ast::Replacement>> {
346    with_source_and_file(source, filename, || {
347        let mut parser = Parser::new(source);
348        parse_until_eof(&mut parser)
349    })
350}
351
352/// Parse a sequence of [`Replacement`s][crate::ast::Replacement] from a file on
353/// disk.
354pub fn parse_replacements_file(path: &Path) -> Result<Vec<ast::Replacement>> {
355    let source = std::fs::read_to_string(path)?;
356    parse_replacements_str(&source, Some(path))
357}
358
359/// Parse a sequence of [`LeftHandSide`s][crate::ast::LeftHandSide] from an
360/// in-memory string.
361pub fn parse_left_hand_sides_str(
362    source: &str,
363    filename: Option<&Path>,
364) -> Result<Vec<ast::LeftHandSide>> {
365    with_source_and_file(source, filename, || {
366        let mut parser = Parser::new(source);
367        parse_until_eof(&mut parser)
368    })
369}
370
371/// Parse a sequence of [`LeftHandSide`s][crate::ast::LeftHandSide] from a file on
372/// disk.
373pub fn parse_left_hand_sides_file(path: &Path) -> Result<Vec<ast::LeftHandSide>> {
374    let source = std::fs::read_to_string(path)?;
375    parse_left_hand_sides_str(&source, Some(path))
376}
377
378/// Parse a sequence of [`RightHandSide`s][crate::ast::RightHandSide] from an
379/// in-memory string.
380pub fn parse_right_hand_sides_str(
381    source: &str,
382    filename: Option<&Path>,
383) -> Result<Vec<ast::RightHandSide>> {
384    with_source_and_file(source, filename, || {
385        let mut parser = Parser::new(source);
386        parse_until_eof(&mut parser)
387    })
388}
389
390/// Parse a sequence of [`RightHandSide`s][crate::ast::RightHandSide] from a file on
391/// disk.
392pub fn parse_right_hand_sides_file(path: &Path) -> Result<Vec<ast::RightHandSide>> {
393    let source = std::fs::read_to_string(path)?;
394    parse_right_hand_sides_str(&source, Some(path))
395}
396
397#[derive(Debug)]
398struct Lexer<'a> {
399    source: &'a str,
400    chars: Peekable<CharIndices<'a>>,
401}
402
403#[derive(Copy, Clone, Debug, PartialEq, Eq)]
404pub(crate) enum Token<'a> {
405    Ident(&'a str),
406    ValName(&'a str),
407    KnownBits(&'a str),
408    Int(&'a str),
409    Comma,
410    Colon,
411    Eq,
412    OpenParen,
413    CloseParen,
414    OpenBracket,
415}
416
417fn is_ident_char(c: char) -> bool {
418    match c {
419        '0'..='9' | 'a'..='z' | 'A'..='Z' | '_' | '.' => true,
420        _ => false,
421    }
422}
423
424impl<'a> Lexer<'a> {
425    fn new(source: &'a str) -> Self {
426        Lexer {
427            source,
428            chars: source.char_indices().peekable(),
429        }
430    }
431
432    fn expect_char(&mut self, c: char) -> Result<()> {
433        match self.chars.next() {
434            Some((_, c2)) if c2 == c => Ok(()),
435            Some((offset, c2)) => Err(ParseError::new(
436                offset,
437                format!("expected '{}', found '{}'", c, c2),
438            )),
439            None => Err(ParseError::new(
440                self.source.len().saturating_sub(1),
441                "unexpected EOF",
442            )),
443        }
444    }
445
446    /// Get the next token.
447    ///
448    /// Returns `None` at EOF.
449    fn next_token(&mut self) -> Result<Option<(usize, Token<'a>)>> {
450        loop {
451            match self.chars.peek() {
452                // EOF.
453                None => return Ok(None),
454
455                // Eat whitespace.
456                Some((_, c)) if c.is_whitespace() => {
457                    while self.chars.peek().map_or(false, |(_, c)| c.is_whitespace()) {
458                        self.chars.next().unwrap();
459                    }
460                }
461
462                // Eat comments.
463                Some((_, ';')) => {
464                    while self.chars.peek().map_or(false, |(_, c)| *c != '\n') {
465                        self.chars.next().unwrap();
466                    }
467                }
468
469                _ => break,
470            }
471        }
472
473        match *self.chars.peek().unwrap() {
474            (start, ',') => {
475                self.chars.next().unwrap();
476                Ok(Some((start, Token::Comma)))
477            }
478            (start, '=') => {
479                self.chars.next().unwrap();
480                Ok(Some((start, Token::Eq)))
481            }
482            (start, ':') => {
483                self.chars.next().unwrap();
484                Ok(Some((start, Token::Colon)))
485            }
486            (start, '(') => {
487                self.chars.next().unwrap();
488                Ok(Some((start, Token::OpenParen)))
489            }
490            (start, ')') => {
491                self.chars.next().unwrap();
492                Ok(Some((start, Token::CloseParen)))
493            }
494            (start, '[') => {
495                self.chars.next().unwrap();
496                Ok(Some((start, Token::OpenBracket)))
497            }
498            (start, '%') => {
499                self.chars.next().unwrap();
500                let mut end = start + 1;
501                while self.chars.peek().map_or(false, |&(_, c)| is_ident_char(c)) {
502                    let (i, _) = self.chars.next().unwrap();
503                    end = i + 1;
504                }
505                if start + 1 == end {
506                    Err(ParseError::new(start, "expected value name"))
507                } else {
508                    Ok(Some((start, Token::ValName(&self.source[start..end]))))
509                }
510            }
511            (start, c) if ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') => {
512                self.chars.next().unwrap();
513                let mut end = start + 1;
514                while self.chars.peek().map_or(false, |&(_, c)| is_ident_char(c)) {
515                    let (i, _) = self.chars.next().unwrap();
516                    end = i + 1;
517                }
518
519                let ident = &self.source[start..end];
520                if ident != "knownBits" {
521                    return Ok(Some((start, Token::Ident(ident))));
522                }
523
524                self.expect_char('=')?;
525                let bit_pattern_start = start + "knownBits=".len();
526                let mut bit_pattern_end = bit_pattern_start;
527                while self
528                    .chars
529                    .peek()
530                    .map_or(false, |&(_, c)| c == '0' || c == '1' || c == 'x')
531                {
532                    let (i, _) = self.chars.next().unwrap();
533                    bit_pattern_end = i + 1;
534                }
535                if bit_pattern_start == bit_pattern_end {
536                    Err(ParseError::new(
537                        bit_pattern_start,
538                        "expected [0|1|x]+ bit pattern for knownBits",
539                    ))
540                } else {
541                    Ok(Some((
542                        start,
543                        Token::KnownBits(&self.source[bit_pattern_start..bit_pattern_end]),
544                    )))
545                }
546            }
547            (start, c) if c == '-' || ('0' <= c && c <= '9') => {
548                self.chars.next().unwrap();
549                let mut end = start + 1;
550                while self
551                    .chars
552                    .peek()
553                    .map_or(false, |&(_, c)| '0' <= c && c <= '9')
554                {
555                    let (i, _) = self.chars.next().unwrap();
556                    end = i + 1;
557                }
558                Ok(Some((start, Token::Int(&self.source[start..end]))))
559            }
560            (start, c) => Err(ParseError::new(start, format!("unexpected '{}'", c))),
561        }
562    }
563}
564
565/// A Souper text parser buffer.
566///
567/// Manages lexing and lookahead, as well as some parsed values and binding
568/// scopes.
569#[derive(Debug)]
570pub struct Parser<'a> {
571    lexer: Lexer<'a>,
572
573    /// Lookahead token that we've peeked at, if any.
574    lookahead: Option<Token<'a>>,
575
576    /// Our current offset into the string we are parsing.
577    offset: usize,
578
579    /// Statements being built up during parsing.
580    statements: Arena<ast::Statement>,
581
582    /// Scope mapping value names to their value id.
583    values: HashMap<String, ast::ValueId>,
584
585    /// Scope mapping block names to their block id.
586    blocks: HashMap<String, ast::BlockId>,
587}
588
589impl<'a> Parser<'a> {
590    /// Construct a new parser for the given Souper source text.
591    pub fn new(source: &'a str) -> Self {
592        Parser {
593            lexer: Lexer::new(source),
594            lookahead: None,
595            offset: 0,
596            statements: Arena::new(),
597            values: HashMap::new(),
598            blocks: HashMap::new(),
599        }
600    }
601
602    pub(crate) fn lookahead(&mut self) -> Result<Option<Token<'a>>> {
603        if self.lookahead.is_none() {
604            if let Some((offset, token)) = self.lexer.next_token()? {
605                self.offset = offset;
606                self.lookahead = Some(token);
607            }
608        }
609        Ok(self.lookahead)
610    }
611
612    pub(crate) fn lookahead_ident(&mut self, ident: &str) -> Result<bool> {
613        Ok(self.lookahead()? == Some(Token::Ident(ident)))
614    }
615
616    /// Are we at EOF?
617    pub(crate) fn eof(&mut self) -> Result<bool> {
618        Ok(self.lookahead()?.is_none())
619    }
620
621    pub(crate) fn error<T>(&self, msg: impl Into<String>) -> Result<T> {
622        Err(ParseError::new(self.offset, msg))
623    }
624
625    pub(crate) fn token(&mut self) -> Result<Token<'a>> {
626        if let Some(tok) = self.lookahead.take() {
627            Ok(tok)
628        } else {
629            match self.lexer.next_token()? {
630                Some((offset, tok)) => {
631                    self.offset = offset;
632                    Ok(tok)
633                }
634                None => {
635                    self.offset = self.lexer.source.len().saturating_sub(1);
636                    self.error("unexpected EOF")
637                }
638            }
639        }
640    }
641
642    pub(crate) fn val_name(&mut self) -> Result<&'a str> {
643        match self.token()? {
644            Token::ValName(s) => Ok(s),
645            _ => self.error("expected a value name"),
646        }
647    }
648
649    pub(crate) fn ident(&mut self, which: &str) -> Result<()> {
650        match self.token()? {
651            Token::Ident(s) if s == which => Ok(()),
652            _ => self.error(format!("expected '{}'", which)),
653        }
654    }
655
656    pub(crate) fn int(&mut self) -> Result<i128> {
657        match self.token()? {
658            Token::Int(x) => match i128::from_str(x) {
659                Ok(x) => Ok(x),
660                Err(e) => self.error(e.to_string()),
661            },
662            _ => self.error("expected an integer literal"),
663        }
664    }
665
666    pub(crate) fn eq(&mut self) -> Result<()> {
667        match self.token()? {
668            Token::Eq => Ok(()),
669            _ => self.error("expected '='"),
670        }
671    }
672
673    pub(crate) fn colon(&mut self) -> Result<()> {
674        match self.token()? {
675            Token::Colon => Ok(()),
676            _ => self.error("expected ':'"),
677        }
678    }
679
680    pub(crate) fn comma(&mut self) -> Result<()> {
681        match self.token()? {
682            Token::Comma => Ok(()),
683            _ => self.error("expected ','"),
684        }
685    }
686
687    pub(crate) fn open_paren(&mut self) -> Result<()> {
688        match self.token()? {
689            Token::OpenParen => Ok(()),
690            _ => self.error("expected '('"),
691        }
692    }
693
694    pub(crate) fn close_paren(&mut self) -> Result<()> {
695        match self.token()? {
696            Token::CloseParen => Ok(()),
697            _ => self.error("expected ')'"),
698        }
699    }
700
701    pub(crate) fn open_bracket(&mut self) -> Result<()> {
702        match self.token()? {
703            Token::OpenBracket => Ok(()),
704            _ => self.error("expected '['"),
705        }
706    }
707
708    fn take_statements(&mut self) -> Arena<ast::Statement> {
709        self.values.clear();
710        self.blocks.clear();
711        mem::replace(&mut self.statements, Arena::new())
712    }
713}
714
715/// A trait for AST nodes that can be parsed from text.
716pub trait Parse: Sized {
717    /// Parse a `Self` from the given buffer.
718    fn parse<'a>(parser: &mut Parser<'a>) -> Result<Self>;
719}
720
721/// A trait for whether an AST node looks like it comes next.
722pub trait Peek {
723    /// Does it look like we can parse a `Self` from the given buffer?
724    fn peek<'a>(parser: &mut Parser<'a>) -> Result<bool>;
725}
726
727impl<P> Parse for Option<P>
728where
729    P: Peek + Parse,
730{
731    fn parse<'a>(parser: &mut Parser<'a>) -> Result<Self> {
732        if P::peek(parser)? {
733            Ok(Some(P::parse(parser)?))
734        } else {
735            Ok(None)
736        }
737    }
738}
739
740impl<P> Parse for Vec<P>
741where
742    P: Peek + Parse,
743{
744    fn parse<'a>(parser: &mut Parser<'a>) -> Result<Self> {
745        let mut ps = vec![];
746        while P::peek(parser)? {
747            ps.push(P::parse(parser)?);
748        }
749        Ok(ps)
750    }
751}
752
753impl Parse for ast::Replacement {
754    fn parse<'a>(parser: &mut Parser<'a>) -> Result<Self> {
755        while ast::Statement::peek(parser)? {
756            parse_statement(parser)?;
757        }
758
759        if ast::Infer::peek(parser)? {
760            let lhs = ast::Infer::parse(parser)?;
761            while ast::Statement::peek(parser)? {
762                parse_statement(parser)?;
763            }
764            parser.ident("result")?;
765            let rhs = ast::Operand::parse(parser)?;
766            let statements = parser.take_statements();
767            return Ok(ast::Replacement::LhsRhs {
768                statements,
769                lhs,
770                rhs,
771            });
772        }
773
774        let cand = ast::Cand::parse(parser)?;
775        let statements = parser.take_statements();
776        Ok(ast::Replacement::Cand { statements, cand })
777    }
778}
779
780impl Parse for ast::LeftHandSide {
781    fn parse<'a>(parser: &mut Parser<'a>) -> Result<Self> {
782        while ast::Statement::peek(parser)? {
783            parse_statement(parser)?;
784        }
785        let infer = ast::Infer::parse(parser)?;
786        let statements = parser.take_statements();
787        Ok(ast::LeftHandSide { statements, infer })
788    }
789}
790
791impl Parse for ast::RightHandSide {
792    fn parse<'a>(parser: &mut Parser<'a>) -> Result<Self> {
793        while ast::Statement::peek(parser)? {
794            parse_statement(parser)?;
795        }
796        parser.ident("result")?;
797        let result = ast::Operand::parse(parser)?;
798        let statements = parser.take_statements();
799        Ok(ast::RightHandSide { statements, result })
800    }
801}
802
803impl Peek for ast::Statement {
804    fn peek<'a>(parser: &mut Parser<'a>) -> Result<bool> {
805        Ok(ast::Assignment::peek(parser)? || ast::Pc::peek(parser)? || ast::BlockPc::peek(parser)?)
806    }
807}
808
809fn parse_statement(parser: &mut Parser) -> Result<()> {
810    if ast::Assignment::peek(parser)? {
811        let assignment = ast::Assignment::parse(parser)?;
812        let name = assignment.name.clone();
813        let is_block = matches!(assignment.value, ast::AssignmentRhs::Block(_));
814        let id = parser
815            .statements
816            .alloc(ast::Statement::Assignment(assignment));
817        parser.values.insert(name.clone(), ast::ValueId(id));
818        if is_block {
819            parser.blocks.insert(name, ast::BlockId(ast::ValueId(id)));
820        }
821        return Ok(());
822    }
823
824    if ast::Pc::peek(parser)? {
825        let pc = ast::Pc::parse(parser)?;
826        parser.statements.alloc(ast::Statement::Pc(pc));
827        return Ok(());
828    }
829
830    if ast::BlockPc::peek(parser)? {
831        let blockpc = ast::BlockPc::parse(parser)?;
832        parser.statements.alloc(ast::Statement::BlockPc(blockpc));
833        return Ok(());
834    }
835
836    parser.error("expected either an assignment, a pc statement, of a blockpc statement")
837}
838
839impl Peek for ast::Infer {
840    fn peek<'a>(parser: &mut Parser<'a>) -> Result<bool> {
841        parser.lookahead_ident("infer")
842    }
843}
844
845impl Parse for ast::Infer {
846    fn parse<'a>(parser: &mut Parser<'a>) -> Result<Self> {
847        parser.ident("infer")?;
848        let name = parser.val_name()?;
849        let value = parser.values.get(name).copied().ok_or_else(|| {
850            ParseError::new(parser.offset, format!("no value named '{}' in scope", name))
851        })?;
852        let attributes = Vec::<ast::RootAttribute>::parse(parser)?;
853        Ok(ast::Infer { value, attributes })
854    }
855}
856
857impl Parse for ast::Cand {
858    fn parse<'a>(parser: &mut Parser<'a>) -> Result<Self> {
859        parser.ident("cand")?;
860        let lhs = ast::Operand::parse(parser)?;
861        let rhs = ast::Operand::parse(parser)?;
862        let attributes = Vec::<ast::RootAttribute>::parse(parser)?;
863        Ok(ast::Cand {
864            lhs,
865            rhs,
866            attributes,
867        })
868    }
869}
870
871impl Peek for ast::RootAttribute {
872    fn peek<'a>(parser: &mut Parser<'a>) -> Result<bool> {
873        Ok(parser.lookahead()? == Some(Token::OpenParen))
874    }
875}
876
877impl Parse for ast::RootAttribute {
878    fn parse<'a>(parser: &mut Parser<'a>) -> Result<Self> {
879        parser.open_paren()?;
880        match parser.token()? {
881            Token::Ident("harvestedFromUse") => {
882                parser.close_paren()?;
883                Ok(ast::RootAttribute::HarvestedFromUse)
884            }
885            Token::Ident("demandedBits") => {
886                parser.eq()?;
887                match parser.token()? {
888                    Token::Int(i) => {
889                        let mut bits = Vec::with_capacity(i.len());
890                        for ch in i.chars() {
891                            match ch {
892                                '0' => bits.push(false),
893                                '1' => bits.push(true),
894                                _ => return parser.error("expected [0|1]+ bit pattern"),
895                            }
896                        }
897                        parser.close_paren()?;
898                        Ok(ast::RootAttribute::DemandedBits(bits))
899                    }
900                    _ => parser.error("expected [0|1]+ bit pattern"),
901                }
902            }
903            _ => parser.error("expected 'demandedBits' or 'harvestedFromUse'"),
904        }
905    }
906}
907
908impl Peek for ast::Assignment {
909    fn peek<'a>(parser: &mut Parser<'a>) -> Result<bool> {
910        Ok(matches!(parser.lookahead()?, Some(Token::ValName(_))))
911    }
912}
913
914impl Parse for ast::Assignment {
915    fn parse<'a>(parser: &mut Parser<'a>) -> Result<Self> {
916        let name = parser.val_name()?.to_string();
917        if parser.values.contains_key(&name) {
918            return parser.error(format!("cannot redefine '{}'", name));
919        }
920
921        let r#type = if parser.lookahead()? == Some(Token::Colon) {
922            parser.colon()?;
923            Some(ast::Type::parse(parser)?)
924        } else {
925            None
926        };
927        parser.eq()?;
928        let value = ast::AssignmentRhs::parse(parser)?;
929        let attributes = Vec::<ast::Attribute>::parse(parser)?;
930        Ok(ast::Assignment {
931            name,
932            r#type,
933            value,
934            attributes,
935        })
936    }
937}
938
939impl Parse for ast::AssignmentRhs {
940    fn parse<'a>(parser: &mut Parser<'a>) -> Result<Self> {
941        if parser.lookahead_ident("var")? {
942            parser.ident("var")?;
943            return Ok(ast::AssignmentRhs::Var);
944        }
945
946        if ast::Block::peek(parser)? {
947            let block = ast::Block::parse(parser)?;
948            return Ok(ast::AssignmentRhs::Block(block));
949        }
950
951        if ast::Phi::peek(parser)? {
952            let phi = ast::Phi::parse(parser)?;
953            return Ok(ast::AssignmentRhs::Phi(phi));
954        }
955
956        if parser.lookahead_ident("reservedinst")? {
957            parser.ident("reservedinst")?;
958            return Ok(ast::AssignmentRhs::ReservedInst);
959        }
960
961        if parser.lookahead_ident("reservedconst")? {
962            parser.ident("reservedconst")?;
963            return Ok(ast::AssignmentRhs::ReservedConst);
964        }
965
966        if ast::Instruction::peek(parser)? {
967            let inst = ast::Instruction::parse(parser)?;
968            return Ok(ast::AssignmentRhs::Instruction(inst));
969        }
970
971        parser.error("expected a constant, var, block, phi, or instruction")
972    }
973}
974
975impl Peek for ast::Pc {
976    fn peek<'a>(parser: &mut Parser<'a>) -> Result<bool> {
977        parser.lookahead_ident("pc")
978    }
979}
980
981impl Parse for ast::Pc {
982    fn parse<'a>(parser: &mut Parser<'a>) -> Result<Self> {
983        parser.ident("pc")?;
984        let x = ast::Operand::parse(parser)?;
985        let y = ast::Operand::parse(parser)?;
986        Ok(ast::Pc { x, y })
987    }
988}
989
990impl Peek for ast::BlockPc {
991    fn peek<'a>(parser: &mut Parser<'a>) -> Result<bool> {
992        parser.lookahead_ident("blockpc")
993    }
994}
995
996impl Parse for ast::BlockPc {
997    fn parse<'a>(parser: &mut Parser<'a>) -> Result<Self> {
998        parser.ident("blockpc")?;
999        let name = parser.val_name()?;
1000        let block =
1001            parser.blocks.get(name).copied().ok_or_else(|| {
1002                ParseError::new(parser.offset, format!("unknown block '{}'", name))
1003            })?;
1004        let predecessor = <i128 as TryInto<u32>>::try_into(parser.int()?)
1005            .map_err(|e| ParseError::new(parser.offset, e.to_string()))?;
1006        let x = ast::Operand::parse(parser)?;
1007        let y = ast::Operand::parse(parser)?;
1008        Ok(ast::BlockPc {
1009            block,
1010            predecessor,
1011            x,
1012            y,
1013        })
1014    }
1015}
1016
1017impl Parse for ast::Type {
1018    fn parse<'a>(parser: &mut Parser<'a>) -> Result<Self> {
1019        match parser.token()? {
1020            Token::Ident(ident) if ident.starts_with('i') => match u16::from_str(&ident[1..]) {
1021                Ok(width) if width > 0 => return Ok(ast::Type { width }),
1022                _ => {}
1023            },
1024            _ => {}
1025        }
1026
1027        parser.error("expected a type (like 'i32', 'i8', or 'i1')")
1028    }
1029}
1030
1031impl Peek for ast::Constant {
1032    fn peek<'a>(parser: &mut Parser<'a>) -> Result<bool> {
1033        Ok(matches!(parser.lookahead()?, Some(Token::Int(_))))
1034    }
1035}
1036
1037impl Parse for ast::Constant {
1038    fn parse<'a>(parser: &mut Parser<'a>) -> Result<Self> {
1039        let value = parser.int()?;
1040        let r#type = if parser.lookahead()? == Some(Token::Colon) {
1041            parser.colon()?;
1042            Some(ast::Type::parse(parser)?)
1043        } else {
1044            None
1045        };
1046        Ok(ast::Constant { value, r#type })
1047    }
1048}
1049
1050impl Peek for ast::Block {
1051    fn peek<'a>(parser: &mut Parser<'a>) -> Result<bool> {
1052        parser.lookahead_ident("block")
1053    }
1054}
1055
1056impl Parse for ast::Block {
1057    fn parse<'a>(parser: &mut Parser<'a>) -> Result<Self> {
1058        parser.ident("block")?;
1059        let predecessors: u32 = <i128 as TryInto<u32>>::try_into(parser.int()?)
1060            .map_err(|e| ParseError::new(parser.offset, e.to_string()))?;
1061        Ok(ast::Block { predecessors })
1062    }
1063}
1064
1065impl Peek for ast::Phi {
1066    fn peek<'a>(parser: &mut Parser<'a>) -> Result<bool> {
1067        parser.lookahead_ident("phi")
1068    }
1069}
1070
1071impl Parse for ast::Phi {
1072    fn parse<'a>(parser: &mut Parser<'a>) -> Result<Self> {
1073        parser.ident("phi")?;
1074        let name = parser.val_name()?;
1075        let block =
1076            parser.blocks.get(name).copied().ok_or_else(|| {
1077                ParseError::new(parser.offset, format!("unknown block '{}'", name))
1078            })?;
1079        let predecessors = match parser.statements[(block.0).0] {
1080            ast::Statement::Assignment(ast::Assignment {
1081                value: ast::AssignmentRhs::Block(ast::Block { predecessors }),
1082                ..
1083            }) => predecessors,
1084            _ => unreachable!(),
1085        };
1086        let mut values = vec![];
1087        for _ in 0..predecessors {
1088            parser.comma()?;
1089            values.push(ast::Operand::parse(parser)?);
1090        }
1091        Ok(ast::Phi { block, values })
1092    }
1093}
1094
1095impl Peek for ast::Attribute {
1096    fn peek<'a>(parser: &mut Parser<'a>) -> Result<bool> {
1097        Ok(parser.lookahead()? == Some(Token::OpenParen))
1098    }
1099}
1100
1101impl Parse for ast::Attribute {
1102    fn parse<'a>(parser: &mut Parser<'a>) -> Result<Self> {
1103        parser.open_paren()?;
1104        match parser.token()? {
1105            Token::KnownBits(kb) => {
1106                let mut bits = Vec::with_capacity(kb.len());
1107                for b in kb.chars() {
1108                    match b {
1109                        '0' => bits.push(Some(false)),
1110                        '1' => bits.push(Some(true)),
1111                        'x' => bits.push(None),
1112                        _ => unreachable!(),
1113                    }
1114                }
1115                parser.close_paren()?;
1116                Ok(ast::Attribute::KnownBits(bits))
1117            }
1118            Token::Ident("powerOfTwo") => {
1119                parser.close_paren()?;
1120                Ok(ast::Attribute::PowerOfTwo)
1121            }
1122            Token::Ident("negative") => {
1123                parser.close_paren()?;
1124                Ok(ast::Attribute::Negative)
1125            }
1126            Token::Ident("nonNegative") => {
1127                parser.close_paren()?;
1128                Ok(ast::Attribute::NonNegative)
1129            }
1130            Token::Ident("nonZero") => {
1131                parser.close_paren()?;
1132                Ok(ast::Attribute::NonZero)
1133            }
1134            Token::Ident("hasExternalUses") => {
1135                parser.close_paren()?;
1136                Ok(ast::Attribute::HasExternalUses)
1137            }
1138            Token::Ident("signBits") => {
1139                parser.eq()?;
1140                let bits = <i128 as TryInto<u8>>::try_into(parser.int()?)
1141                    .map_err(|e| ParseError::new(parser.offset, e.to_string()))?;
1142                parser.close_paren()?;
1143                Ok(ast::Attribute::SignBits(bits))
1144            }
1145            Token::Ident("range") => {
1146                parser.eq()?;
1147                parser.open_bracket()?;
1148                let min = parser.int()?;
1149                parser.comma()?;
1150                let max = parser.int()?;
1151                parser.close_paren()?;
1152                parser.close_paren()?;
1153                Ok(ast::Attribute::Range(min, max))
1154            }
1155            Token::Ident(id) => parser.error(format!("unknown attribute '{}'", id)),
1156            _ => parser.error("expected an attribute identifier"),
1157        }
1158    }
1159}
1160
1161impl Parse for ast::Operand {
1162    fn parse<'a>(parser: &mut Parser<'a>) -> Result<Self> {
1163        if matches!(parser.lookahead()?, Some(Token::ValName(_))) {
1164            let name = parser.val_name()?;
1165            let value = parser.values.get(name).copied().ok_or_else(|| {
1166                ParseError::new(parser.offset, format!("unknown value '{}'", name))
1167            })?;
1168            Ok(ast::Operand::Value(value))
1169        } else {
1170            let constant = ast::Constant::parse(parser)?;
1171            Ok(ast::Operand::Constant(constant))
1172        }
1173    }
1174}
1175
1176#[cfg(test)]
1177mod tests {
1178    use super::*;
1179
1180    #[test]
1181    fn test_parse_error_display() {
1182        let source = "\
1183hello, how are you?
1184> I am good, thanks
1185thats good
1186";
1187        let mut err = ParseError::new(45, "missing apostrophe");
1188        err.set_source(source);
1189        err.set_filepath("path/to/foo.txt");
1190
1191        let expected = "\
1192path/to/foo.txt:3:5: missing apostrophe
1193     |
1194   3 | thats good
1195     |     ^";
1196        let actual = err.to_string();
1197        assert_eq!(expected, actual);
1198    }
1199
1200    #[test]
1201    fn test_lexer_tokens() {
1202        use super::Token::*;
1203
1204        macro_rules! tokenizes {
1205            ( $( $source:expr => [ $($tok:expr),* $(,)* ]; )* ) => {
1206                $({
1207                    eprintln!("=== Lexing {:?} ===", $source);
1208                    let mut lexer = Lexer::new($source);
1209                    $(
1210                        let expected = $tok;
1211                        eprintln!("Expect: {:?}", expected);
1212                        let actual = lexer.next_token()
1213                            .expect("should not have an error during lexing")
1214                            .expect("should not hit EOF")
1215                            .1;
1216                        eprintln!("Actual: {:?}", actual);
1217                        assert_eq!(expected, actual);
1218                    )*
1219                    assert!(lexer.next_token().unwrap().is_none());
1220                })*
1221            }
1222        }
1223
1224        tokenizes! {
1225            "foo foo1 foo_foo FOO" => [
1226                Ident("foo"),
1227                Ident("foo1"),
1228                Ident("foo_foo"),
1229                Ident("FOO"),
1230            ];
1231            "%0 %foo %FOO %foo1" => [
1232                ValName("%0"),
1233                ValName("%foo"),
1234                ValName("%FOO"),
1235                ValName("%foo1"),
1236            ];
1237            "knownBits=0 knownBits=1 knownBits=x knownBits=01x01x01x" => [
1238                KnownBits("0"),
1239                KnownBits("1"),
1240                KnownBits("x"),
1241                KnownBits("01x01x01x"),
1242            ];
1243            ", : = ( ) [" => [Comma, Colon, Eq, OpenParen, CloseParen, OpenBracket];
1244            "1234 -4567" => [Int("1234"), Int("-4567")];
1245            "%0:i8" => [ValName("%0"), Colon, Ident("i8")];
1246            "hello ; blah blah blah\n goodbye" => [Ident("hello"), Ident("goodbye")];
1247        }
1248    }
1249
1250    #[test]
1251    fn test_lexer_offsets() {
1252        macro_rules! offsets {
1253            ( $source:expr => [ $($offset:expr),* $(,)* ]; ) => {
1254                let mut lexer = Lexer::new($source);
1255                $(
1256                    let expected = $offset;
1257                    eprintln!("Expect: {:?}", expected);
1258                    let actual = lexer.next_token()
1259                        .expect("should not have an error during lexing")
1260                        .expect("should not hit EOF")
1261                        .0;
1262                    eprintln!("Actual: {:?}", actual);
1263                    assert_eq!(expected, actual);
1264                )*
1265                assert!(lexer.next_token().unwrap().is_none());
1266            }
1267        }
1268
1269        #[rustfmt::skip]
1270        offsets! {
1271            //         1         2         3         4
1272            //12345678901234567890123456789012345678901234567890
1273            "foo %123 knownBits=01x ,   :   =   (   )   [   42" => [
1274             0,  4,   9,            23, 27, 31, 35, 39, 43, 47
1275            ];
1276        }
1277    }
1278}