1use std::{fmt, fmt::Write, iter::Peekable, str::CharIndices};
2
3type Spanned<Token, Loc, Error> = Result<(Token, Loc), Error>;
4
5macro_rules! syntax_err {
6 ($msg:expr) => {{
7 Err(SyntaxError::new($msg))
8 }};
9 ($msg:expr, $($tt:tt)*) => {{
10 Err(SyntaxError::new(format!($msg, $($tt)*)))
11 }};
12}
13
14#[derive(Debug, Clone, thiserror::Error)]
16#[error("{0}")]
17pub struct SyntaxError(String);
18
19impl SyntaxError {
20 pub fn new(s: impl Into<String>) -> Self {
21 SyntaxError(s.into())
22 }
23}
24
25#[derive(PartialEq, Eq)]
26enum Token<'a> {
27 Number(&'a str),
28 Semicolon,
29 Colon,
30 In,
32 Out,
34 Regular,
36}
37
38impl<'a> fmt::Debug for Token<'a> {
39 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
40 match self {
41 Token::Number(s) => write!(f, "NUMBER({s:?})"),
42 Token::Semicolon => write!(f, "SEMICOLON"),
43 Token::Colon => write!(f, "COLON"),
44 Token::In => write!(f, "JMP(i)"),
45 Token::Out => write!(f, "JMP(o)"),
46 Token::Regular => write!(f, "JMP(-)"),
47 }
48 }
49}
50
51impl<'a> fmt::Display for Token<'a> {
52 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
53 match self {
54 Token::Number(_) => write!(f, "number"),
55 Token::Semicolon => write!(f, "`;`"),
56 Token::Colon => write!(f, "`:`"),
57 Token::In => write!(f, "jmp-in"),
58 Token::Out => write!(f, "jmp-out"),
59 Token::Regular => write!(f, "jmp"),
60 }
61 }
62}
63
64struct TokenStream<'input> {
65 input: &'input str,
66 chars: Peekable<CharIndices<'input>>,
67}
68
69impl<'input> TokenStream<'input> {
70 pub fn new(input: &'input str) -> TokenStream<'input> {
71 TokenStream { chars: input.char_indices().peekable(), input }
72 }
73
74 fn number(
75 &mut self,
76 start: usize,
77 mut end: usize,
78 ) -> Option<Spanned<Token<'input>, usize, SyntaxError>> {
79 loop {
80 if let Some((_, ch)) = self.chars.peek().cloned() {
81 if !ch.is_ascii_digit() {
82 break
83 }
84 self.chars.next();
85 end += 1;
86 } else {
87 end = self.input.len();
88 break
89 }
90 }
91 Some(Ok((Token::Number(&self.input[start..end]), start)))
92 }
93}
94
95impl<'input> Iterator for TokenStream<'input> {
96 type Item = Spanned<Token<'input>, usize, SyntaxError>;
97
98 fn next(&mut self) -> Option<Self::Item> {
99 match self.chars.next()? {
100 (i, ';') => Some(Ok((Token::Semicolon, i))),
101 (i, ':') => Some(Ok((Token::Colon, i))),
102 (i, 'i') => Some(Ok((Token::In, i))),
103 (i, 'o') => Some(Ok((Token::Out, i))),
104 (start, '-') => match self.chars.peek() {
105 Some((_, ch)) if ch.is_ascii_digit() => {
106 self.chars.next();
107 self.number(start, start + 2)
108 }
109 _ => Some(Ok((Token::Regular, start))),
110 },
111 (start, ch) if ch.is_ascii_digit() => self.number(start, start + 1),
112 (i, c) => Some(syntax_err!("Unexpected input {} at {}", c, i)),
113 }
114 }
115}
116
117#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
118pub enum Jump {
119 In,
121 Out,
123 Regular,
125}
126
127impl AsRef<str> for Jump {
128 fn as_ref(&self) -> &str {
129 match self {
130 Jump::In => "i",
131 Jump::Out => "o",
132 Jump::Regular => "-",
133 }
134 }
135}
136
137impl fmt::Display for Jump {
138 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
139 f.write_str(self.as_ref())
140 }
141}
142
143pub type SourceMap = Vec<SourceElement>;
147
148#[derive(Debug, Clone, PartialEq, Eq, Hash)]
160pub struct SourceElement {
161 pub offset: usize,
163 pub length: usize,
165 pub index: Option<u32>,
172 pub jump: Jump,
174 pub modifier_depth: usize,
177}
178
179impl fmt::Display for SourceElement {
180 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
181 write!(
182 f,
183 "{}:{}:{}:{}:{}",
184 self.offset,
185 self.length,
186 self.index.map(|i| i as i64).unwrap_or(-1),
187 self.jump,
188 self.modifier_depth
189 )
190 }
191}
192
193#[derive(Default)]
194struct SourceElementBuilder {
195 pub offset: Option<usize>,
196 pub length: Option<usize>,
197 pub index: Option<Option<u32>>,
198 pub jump: Option<Jump>,
199 pub modifier_depth: Option<usize>,
200}
201
202impl fmt::Display for SourceElementBuilder {
203 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
204 if self.offset.is_none() &&
205 self.length.is_none() &&
206 self.index.is_none() &&
207 self.jump.is_none() &&
208 self.modifier_depth.is_none()
209 {
210 return Ok(())
211 }
212
213 if let Some(s) = self.offset {
214 if self.index == Some(None) {
215 f.write_str("-1")?;
216 } else {
217 s.fmt(f)?;
218 }
219 }
220 if self.length.is_none() &&
221 self.index.is_none() &&
222 self.jump.is_none() &&
223 self.modifier_depth.is_none()
224 {
225 return Ok(())
226 }
227 f.write_char(':')?;
228
229 if let Some(s) = self.length {
230 if self.index == Some(None) {
231 f.write_str("-1")?;
232 } else {
233 s.fmt(f)?;
234 }
235 }
236 if self.index.is_none() && self.jump.is_none() && self.modifier_depth.is_none() {
237 return Ok(())
238 }
239 f.write_char(':')?;
240
241 if let Some(s) = self.index {
242 let s = s.map(|s| s as i64).unwrap_or(-1);
243 s.fmt(f)?;
244 }
245 if self.jump.is_none() && self.modifier_depth.is_none() {
246 return Ok(())
247 }
248 f.write_char(':')?;
249
250 if let Some(s) = self.jump {
251 s.fmt(f)?;
252 }
253 if self.modifier_depth.is_none() {
254 return Ok(())
255 }
256 f.write_char(':')?;
257
258 if let Some(s) = self.modifier_depth {
259 if self.index == Some(None) {
260 f.write_str("-1")?;
261 } else {
262 s.fmt(f)?;
263 }
264 }
265
266 Ok(())
267 }
268}
269
270impl SourceElementBuilder {
271 fn finish(self, prev: Option<SourceElement>) -> Result<SourceElement, SyntaxError> {
272 let element = if let Some(prev) = prev {
273 SourceElement {
274 offset: self.offset.unwrap_or(prev.offset),
275 length: self.length.unwrap_or(prev.length),
276 index: self.index.unwrap_or(prev.index),
277 jump: self.jump.unwrap_or(prev.jump),
278 modifier_depth: self.modifier_depth.unwrap_or(prev.modifier_depth),
279 }
280 } else {
281 SourceElement {
282 offset: self.offset.ok_or_else(|| SyntaxError::new("No previous offset"))?,
283 length: self.length.ok_or_else(|| SyntaxError::new("No previous length"))?,
284 index: self.index.ok_or_else(|| SyntaxError::new("No previous index"))?,
285 jump: self.jump.ok_or_else(|| SyntaxError::new("No previous jump"))?,
286 modifier_depth: self.modifier_depth.unwrap_or_default(),
287 }
288 };
289 Ok(element)
290 }
291
292 fn set_jmp(&mut self, jmp: Jump, i: usize) -> Option<SyntaxError> {
293 if self.jump.is_some() {
294 return Some(SyntaxError::new(format!("Jump already set: {i}")))
295 }
296 self.jump = Some(jmp);
297 None
298 }
299
300 fn set_offset(&mut self, offset: usize, i: usize) -> Option<SyntaxError> {
301 if self.offset.is_some() {
302 return Some(SyntaxError::new(format!("Offset already set: {i}")))
303 }
304 self.offset = Some(offset);
305 None
306 }
307
308 fn set_length(&mut self, length: usize, i: usize) -> Option<SyntaxError> {
309 if self.length.is_some() {
310 return Some(SyntaxError::new(format!("Length already set: {i}")))
311 }
312 self.length = Some(length);
313 None
314 }
315
316 fn set_index(&mut self, index: Option<u32>, i: usize) -> Option<SyntaxError> {
317 if self.index.is_some() {
318 return Some(SyntaxError::new(format!("Index already set: {i}")))
319 }
320 self.index = Some(index);
321 None
322 }
323
324 fn set_modifier(&mut self, modifier_depth: usize, i: usize) -> Option<SyntaxError> {
325 if self.modifier_depth.is_some() {
326 return Some(SyntaxError::new(format!("Modifier depth already set: {i}")))
327 }
328 self.modifier_depth = Some(modifier_depth);
329 None
330 }
331}
332
333pub struct Parser<'input> {
334 stream: TokenStream<'input>,
335 last_element: Option<SourceElement>,
336 done: bool,
337 #[cfg(test)]
338 output: Option<&'input mut dyn Write>,
339}
340
341impl<'input> Parser<'input> {
342 pub fn new(input: &'input str) -> Self {
343 Self {
344 stream: TokenStream::new(input),
345 last_element: None,
346 done: false,
347 #[cfg(test)]
348 output: None,
349 }
350 }
351}
352
353macro_rules! parse_number {
354 ($num:expr, $pos:expr) => {{
355 let num = match $num.parse::<i64>() {
356 Ok(num) => num,
357 Err(_) => {
358 return Some(syntax_err!(
359 "Expected {} to be a `{}` at {}",
360 $num,
361 stringify!($t),
362 $pos
363 ))
364 }
365 };
366 match num {
367 i if i < -1 => {
368 return Some(syntax_err!("Unexpected negative identifier of `{}` at {}", i, $pos))
369 }
370 -1 => None,
371 i => Some(i as u32),
372 }
373 }};
374}
375
376macro_rules! bail_opt {
377 ($opt:stmt) => {
378 if let Some(err) = { $opt } {
379 return Some(Err(err))
380 }
381 };
382}
383
384impl<'input> Iterator for Parser<'input> {
385 type Item = Result<SourceElement, SyntaxError>;
386
387 fn next(&mut self) -> Option<Self::Item> {
388 let mut state = State::Offset;
390 let mut builder = SourceElementBuilder::default();
391
392 loop {
393 match self.stream.next() {
394 Some(Ok((token, pos))) => match token {
395 Token::Semicolon => break,
396 Token::Number(num) => match state {
397 State::Offset => {
398 bail_opt!(builder.set_offset(
399 parse_number!(num, pos).unwrap_or_default() as usize,
400 pos
401 ))
402 }
403 State::Length => {
404 bail_opt!(builder.set_length(
405 parse_number!(num, pos).unwrap_or_default() as usize,
406 pos
407 ))
408 }
409 State::Index => {
410 bail_opt!(builder.set_index(parse_number!(num, pos), pos))
411 }
412 State::Modifier => {
413 bail_opt!(builder.set_modifier(
414 parse_number!(num, pos).unwrap_or_default() as usize,
415 pos
416 ))
417 }
418 State::Jmp => {
419 return Some(syntax_err!("Expected Jump found number at {}", pos))
420 }
421 },
422 Token::Colon => {
423 bail_opt!(state.advance(pos))
424 }
425 Token::In => {
426 bail_opt!(builder.set_jmp(Jump::In, pos))
427 }
428 Token::Out => {
429 bail_opt!(builder.set_jmp(Jump::Out, pos))
430 }
431 Token::Regular => {
432 bail_opt!(builder.set_jmp(Jump::Regular, pos))
433 }
434 },
435 Some(Err(err)) => return Some(Err(err)),
436 None => {
437 if self.done {
438 return None
439 }
440 self.done = true;
441 break
442 }
443 }
444 }
445
446 #[cfg(test)]
447 {
448 if let Some(out) = self.output.as_mut() {
449 if self.last_element.is_some() {
450 let _ = out.write_char(';');
451 }
452 let _ = out.write_str(&builder.to_string());
453 }
454 }
455
456 let element = match builder.finish(self.last_element.take()) {
457 Ok(element) => {
458 self.last_element = Some(element.clone());
459 Ok(element)
460 }
461 Err(err) => Err(err),
462 };
463 Some(element)
464 }
465}
466
467#[derive(Clone, PartialEq, Eq, Copy)]
469enum State {
470 Offset,
472 Length,
474 Index,
476 Jmp,
478 Modifier,
480}
481
482impl State {
483 fn advance(&mut self, i: usize) -> Option<SyntaxError> {
484 match self {
485 State::Offset => *self = State::Length,
486 State::Length => *self = State::Index,
487 State::Index => *self = State::Jmp,
488 State::Jmp => *self = State::Modifier,
489 State::Modifier => return Some(SyntaxError::new(format!("unexpected colon at {i}"))),
490 }
491 None
492 }
493}
494
495pub fn parse(input: &str) -> Result<SourceMap, SyntaxError> {
497 Parser::new(input).collect()
498}
499
500#[cfg(test)]
501mod tests {
502 use super::*;
503
504 #[allow(unused)]
505 fn tokenize(s: &str) -> Vec<Spanned<Token, usize, SyntaxError>> {
506 TokenStream::new(s).collect()
507 }
508
509 #[test]
510 fn can_parse_source_maps() {
511 let source_maps = include_str!("../test-data/out-source-maps.txt");
513
514 for (line, s) in source_maps.lines().enumerate() {
515 parse(s).unwrap_or_else(|_| panic!("Failed to parse line {line}"));
516 }
517 }
518
519 #[test]
520 fn can_parse_foundry_cheatcodes_sol_maps() {
521 let s = include_str!("../test-data/cheatcodes.sol-sourcemap.txt");
522 let mut out = String::new();
523 let mut parser = Parser::new(s);
524 parser.output = Some(&mut out);
525 let _map = parser.collect::<Result<SourceMap, _>>().unwrap();
526 assert_eq!(out, s);
527 }
528}