ethers_solc/
sourcemap.rs

1use std::{fmt, fmt::Write, iter::Peekable, str::CharIndices};
2
3type Spanned<Token, Loc, Error> = Result<(Token, Loc), Error>;
4
5macro_rules! syntax_err {
6    ($msg:expr) => {{
7        Err(SyntaxError::new($msg))
8    }};
9    ($msg:expr, $($tt:tt)*) => {{
10        Err(SyntaxError::new(format!($msg, $($tt)*)))
11    }};
12}
13
14/// An error that can happen during source map parsing.
15#[derive(Debug, Clone, thiserror::Error)]
16#[error("{0}")]
17pub struct SyntaxError(String);
18
19impl SyntaxError {
20    pub fn new(s: impl Into<String>) -> Self {
21        SyntaxError(s.into())
22    }
23}
24
25#[derive(PartialEq, Eq)]
26enum Token<'a> {
27    Number(&'a str),
28    Semicolon,
29    Colon,
30    /// `i` which represents an instruction that goes into a function
31    In,
32    /// `o` which represents an instruction that returns from a function
33    Out,
34    /// `-` regular jump
35    Regular,
36}
37
38impl<'a> fmt::Debug for Token<'a> {
39    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
40        match self {
41            Token::Number(s) => write!(f, "NUMBER({s:?})"),
42            Token::Semicolon => write!(f, "SEMICOLON"),
43            Token::Colon => write!(f, "COLON"),
44            Token::In => write!(f, "JMP(i)"),
45            Token::Out => write!(f, "JMP(o)"),
46            Token::Regular => write!(f, "JMP(-)"),
47        }
48    }
49}
50
51impl<'a> fmt::Display for Token<'a> {
52    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
53        match self {
54            Token::Number(_) => write!(f, "number"),
55            Token::Semicolon => write!(f, "`;`"),
56            Token::Colon => write!(f, "`:`"),
57            Token::In => write!(f, "jmp-in"),
58            Token::Out => write!(f, "jmp-out"),
59            Token::Regular => write!(f, "jmp"),
60        }
61    }
62}
63
64struct TokenStream<'input> {
65    input: &'input str,
66    chars: Peekable<CharIndices<'input>>,
67}
68
69impl<'input> TokenStream<'input> {
70    pub fn new(input: &'input str) -> TokenStream<'input> {
71        TokenStream { chars: input.char_indices().peekable(), input }
72    }
73
74    fn number(
75        &mut self,
76        start: usize,
77        mut end: usize,
78    ) -> Option<Spanned<Token<'input>, usize, SyntaxError>> {
79        loop {
80            if let Some((_, ch)) = self.chars.peek().cloned() {
81                if !ch.is_ascii_digit() {
82                    break
83                }
84                self.chars.next();
85                end += 1;
86            } else {
87                end = self.input.len();
88                break
89            }
90        }
91        Some(Ok((Token::Number(&self.input[start..end]), start)))
92    }
93}
94
95impl<'input> Iterator for TokenStream<'input> {
96    type Item = Spanned<Token<'input>, usize, SyntaxError>;
97
98    fn next(&mut self) -> Option<Self::Item> {
99        match self.chars.next()? {
100            (i, ';') => Some(Ok((Token::Semicolon, i))),
101            (i, ':') => Some(Ok((Token::Colon, i))),
102            (i, 'i') => Some(Ok((Token::In, i))),
103            (i, 'o') => Some(Ok((Token::Out, i))),
104            (start, '-') => match self.chars.peek() {
105                Some((_, ch)) if ch.is_ascii_digit() => {
106                    self.chars.next();
107                    self.number(start, start + 2)
108                }
109                _ => Some(Ok((Token::Regular, start))),
110            },
111            (start, ch) if ch.is_ascii_digit() => self.number(start, start + 1),
112            (i, c) => Some(syntax_err!("Unexpected input {} at {}", c, i)),
113        }
114    }
115}
116
117#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
118pub enum Jump {
119    /// A jump instruction that goes into a function
120    In,
121    /// A jump  represents an instruction that returns from a function
122    Out,
123    /// A regular jump instruction
124    Regular,
125}
126
127impl AsRef<str> for Jump {
128    fn as_ref(&self) -> &str {
129        match self {
130            Jump::In => "i",
131            Jump::Out => "o",
132            Jump::Regular => "-",
133        }
134    }
135}
136
137impl fmt::Display for Jump {
138    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
139        f.write_str(self.as_ref())
140    }
141}
142
143/// Represents a whole source map as list of `SourceElement`s
144///
145/// See also <https://docs.soliditylang.org/en/latest/internals/source_mappings.html#source-mappings>
146pub type SourceMap = Vec<SourceElement>;
147
148/// Represents a single element in the source map
149/// A solidity source map entry takes the following form
150///
151/// before 0.6.0
152///   s:l:f:j
153///
154/// after 0.6.0
155///   s:l:f:j:m
156///
157/// Where s is the byte-offset to the start of the range in the source file, l is the length of the
158/// source range in bytes and f is the source index.
159#[derive(Debug, Clone, PartialEq, Eq, Hash)]
160pub struct SourceElement {
161    /// The byte-offset to the start of the range in the source file
162    pub offset: usize,
163    /// The length of the source range in bytes
164    pub length: usize,
165    /// the source index
166    ///
167    /// Note: In the case of instructions that are not associated with any particular source file,
168    /// the source mapping assigns an integer identifier of -1. This may happen for bytecode
169    /// sections stemming from compiler-generated inline assembly statements.
170    /// This case is represented as a `None` value
171    pub index: Option<u32>,
172    /// Jump instruction
173    pub jump: Jump,
174    /// “modifier depth”. This depth is increased whenever the placeholder statement (_) is entered
175    /// in a modifier and decreased when it is left again.
176    pub modifier_depth: usize,
177}
178
179impl fmt::Display for SourceElement {
180    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
181        write!(
182            f,
183            "{}:{}:{}:{}:{}",
184            self.offset,
185            self.length,
186            self.index.map(|i| i as i64).unwrap_or(-1),
187            self.jump,
188            self.modifier_depth
189        )
190    }
191}
192
193#[derive(Default)]
194struct SourceElementBuilder {
195    pub offset: Option<usize>,
196    pub length: Option<usize>,
197    pub index: Option<Option<u32>>,
198    pub jump: Option<Jump>,
199    pub modifier_depth: Option<usize>,
200}
201
202impl fmt::Display for SourceElementBuilder {
203    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
204        if self.offset.is_none() &&
205            self.length.is_none() &&
206            self.index.is_none() &&
207            self.jump.is_none() &&
208            self.modifier_depth.is_none()
209        {
210            return Ok(())
211        }
212
213        if let Some(s) = self.offset {
214            if self.index == Some(None) {
215                f.write_str("-1")?;
216            } else {
217                s.fmt(f)?;
218            }
219        }
220        if self.length.is_none() &&
221            self.index.is_none() &&
222            self.jump.is_none() &&
223            self.modifier_depth.is_none()
224        {
225            return Ok(())
226        }
227        f.write_char(':')?;
228
229        if let Some(s) = self.length {
230            if self.index == Some(None) {
231                f.write_str("-1")?;
232            } else {
233                s.fmt(f)?;
234            }
235        }
236        if self.index.is_none() && self.jump.is_none() && self.modifier_depth.is_none() {
237            return Ok(())
238        }
239        f.write_char(':')?;
240
241        if let Some(s) = self.index {
242            let s = s.map(|s| s as i64).unwrap_or(-1);
243            s.fmt(f)?;
244        }
245        if self.jump.is_none() && self.modifier_depth.is_none() {
246            return Ok(())
247        }
248        f.write_char(':')?;
249
250        if let Some(s) = self.jump {
251            s.fmt(f)?;
252        }
253        if self.modifier_depth.is_none() {
254            return Ok(())
255        }
256        f.write_char(':')?;
257
258        if let Some(s) = self.modifier_depth {
259            if self.index == Some(None) {
260                f.write_str("-1")?;
261            } else {
262                s.fmt(f)?;
263            }
264        }
265
266        Ok(())
267    }
268}
269
270impl SourceElementBuilder {
271    fn finish(self, prev: Option<SourceElement>) -> Result<SourceElement, SyntaxError> {
272        let element = if let Some(prev) = prev {
273            SourceElement {
274                offset: self.offset.unwrap_or(prev.offset),
275                length: self.length.unwrap_or(prev.length),
276                index: self.index.unwrap_or(prev.index),
277                jump: self.jump.unwrap_or(prev.jump),
278                modifier_depth: self.modifier_depth.unwrap_or(prev.modifier_depth),
279            }
280        } else {
281            SourceElement {
282                offset: self.offset.ok_or_else(|| SyntaxError::new("No previous offset"))?,
283                length: self.length.ok_or_else(|| SyntaxError::new("No previous length"))?,
284                index: self.index.ok_or_else(|| SyntaxError::new("No previous index"))?,
285                jump: self.jump.ok_or_else(|| SyntaxError::new("No previous jump"))?,
286                modifier_depth: self.modifier_depth.unwrap_or_default(),
287            }
288        };
289        Ok(element)
290    }
291
292    fn set_jmp(&mut self, jmp: Jump, i: usize) -> Option<SyntaxError> {
293        if self.jump.is_some() {
294            return Some(SyntaxError::new(format!("Jump already set: {i}")))
295        }
296        self.jump = Some(jmp);
297        None
298    }
299
300    fn set_offset(&mut self, offset: usize, i: usize) -> Option<SyntaxError> {
301        if self.offset.is_some() {
302            return Some(SyntaxError::new(format!("Offset already set: {i}")))
303        }
304        self.offset = Some(offset);
305        None
306    }
307
308    fn set_length(&mut self, length: usize, i: usize) -> Option<SyntaxError> {
309        if self.length.is_some() {
310            return Some(SyntaxError::new(format!("Length already set: {i}")))
311        }
312        self.length = Some(length);
313        None
314    }
315
316    fn set_index(&mut self, index: Option<u32>, i: usize) -> Option<SyntaxError> {
317        if self.index.is_some() {
318            return Some(SyntaxError::new(format!("Index already set: {i}")))
319        }
320        self.index = Some(index);
321        None
322    }
323
324    fn set_modifier(&mut self, modifier_depth: usize, i: usize) -> Option<SyntaxError> {
325        if self.modifier_depth.is_some() {
326            return Some(SyntaxError::new(format!("Modifier depth already set: {i}")))
327        }
328        self.modifier_depth = Some(modifier_depth);
329        None
330    }
331}
332
333pub struct Parser<'input> {
334    stream: TokenStream<'input>,
335    last_element: Option<SourceElement>,
336    done: bool,
337    #[cfg(test)]
338    output: Option<&'input mut dyn Write>,
339}
340
341impl<'input> Parser<'input> {
342    pub fn new(input: &'input str) -> Self {
343        Self {
344            stream: TokenStream::new(input),
345            last_element: None,
346            done: false,
347            #[cfg(test)]
348            output: None,
349        }
350    }
351}
352
353macro_rules! parse_number {
354    ($num:expr, $pos:expr) => {{
355        let num = match $num.parse::<i64>() {
356            Ok(num) => num,
357            Err(_) => {
358                return Some(syntax_err!(
359                    "Expected {} to be a `{}` at {}",
360                    $num,
361                    stringify!($t),
362                    $pos
363                ))
364            }
365        };
366        match num {
367            i if i < -1 => {
368                return Some(syntax_err!("Unexpected negative identifier of `{}` at {}", i, $pos))
369            }
370            -1 => None,
371            i => Some(i as u32),
372        }
373    }};
374}
375
376macro_rules! bail_opt {
377    ($opt:stmt) => {
378        if let Some(err) = { $opt } {
379            return Some(Err(err))
380        }
381    };
382}
383
384impl<'input> Iterator for Parser<'input> {
385    type Item = Result<SourceElement, SyntaxError>;
386
387    fn next(&mut self) -> Option<Self::Item> {
388        // start parsing at the offset state, `s`
389        let mut state = State::Offset;
390        let mut builder = SourceElementBuilder::default();
391
392        loop {
393            match self.stream.next() {
394                Some(Ok((token, pos))) => match token {
395                    Token::Semicolon => break,
396                    Token::Number(num) => match state {
397                        State::Offset => {
398                            bail_opt!(builder.set_offset(
399                                parse_number!(num, pos).unwrap_or_default() as usize,
400                                pos
401                            ))
402                        }
403                        State::Length => {
404                            bail_opt!(builder.set_length(
405                                parse_number!(num, pos).unwrap_or_default() as usize,
406                                pos
407                            ))
408                        }
409                        State::Index => {
410                            bail_opt!(builder.set_index(parse_number!(num, pos), pos))
411                        }
412                        State::Modifier => {
413                            bail_opt!(builder.set_modifier(
414                                parse_number!(num, pos).unwrap_or_default() as usize,
415                                pos
416                            ))
417                        }
418                        State::Jmp => {
419                            return Some(syntax_err!("Expected Jump found number at {}", pos))
420                        }
421                    },
422                    Token::Colon => {
423                        bail_opt!(state.advance(pos))
424                    }
425                    Token::In => {
426                        bail_opt!(builder.set_jmp(Jump::In, pos))
427                    }
428                    Token::Out => {
429                        bail_opt!(builder.set_jmp(Jump::Out, pos))
430                    }
431                    Token::Regular => {
432                        bail_opt!(builder.set_jmp(Jump::Regular, pos))
433                    }
434                },
435                Some(Err(err)) => return Some(Err(err)),
436                None => {
437                    if self.done {
438                        return None
439                    }
440                    self.done = true;
441                    break
442                }
443            }
444        }
445
446        #[cfg(test)]
447        {
448            if let Some(out) = self.output.as_mut() {
449                if self.last_element.is_some() {
450                    let _ = out.write_char(';');
451                }
452                let _ = out.write_str(&builder.to_string());
453            }
454        }
455
456        let element = match builder.finish(self.last_element.take()) {
457            Ok(element) => {
458                self.last_element = Some(element.clone());
459                Ok(element)
460            }
461            Err(err) => Err(err),
462        };
463        Some(element)
464    }
465}
466
467/// State machine to keep track of separating `:`
468#[derive(Clone, PartialEq, Eq, Copy)]
469enum State {
470    // s
471    Offset,
472    // l
473    Length,
474    // f
475    Index,
476    // j
477    Jmp,
478    // m
479    Modifier,
480}
481
482impl State {
483    fn advance(&mut self, i: usize) -> Option<SyntaxError> {
484        match self {
485            State::Offset => *self = State::Length,
486            State::Length => *self = State::Index,
487            State::Index => *self = State::Jmp,
488            State::Jmp => *self = State::Modifier,
489            State::Modifier => return Some(SyntaxError::new(format!("unexpected colon at {i}"))),
490        }
491        None
492    }
493}
494
495/// Parses a source map
496pub fn parse(input: &str) -> Result<SourceMap, SyntaxError> {
497    Parser::new(input).collect()
498}
499
500#[cfg(test)]
501mod tests {
502    use super::*;
503
504    #[allow(unused)]
505    fn tokenize(s: &str) -> Vec<Spanned<Token, usize, SyntaxError>> {
506        TokenStream::new(s).collect()
507    }
508
509    #[test]
510    fn can_parse_source_maps() {
511        // all source maps from the compiler output test data
512        let source_maps = include_str!("../test-data/out-source-maps.txt");
513
514        for (line, s) in source_maps.lines().enumerate() {
515            parse(s).unwrap_or_else(|_| panic!("Failed to parse line {line}"));
516        }
517    }
518
519    #[test]
520    fn can_parse_foundry_cheatcodes_sol_maps() {
521        let s = include_str!("../test-data/cheatcodes.sol-sourcemap.txt");
522        let mut out = String::new();
523        let mut parser = Parser::new(s);
524        parser.output = Some(&mut out);
525        let _map = parser.collect::<Result<SourceMap, _>>().unwrap();
526        assert_eq!(out, s);
527    }
528}