semver_parser/
parser.rs

1// this is only for parsing versions now
2
3use std::fmt;
4use std::mem;
5
6use self::Error::*;
7use crate::lexer::{self, Lexer, Token};
8use crate::version::{Identifier, Version};
9
10#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
11pub enum Error<'input> {
12    /// Needed more tokens for parsing, but none are available.
13    UnexpectedEnd,
14    /// Unexpected token.
15    UnexpectedToken(Token<'input>),
16    /// An error occurred in the lexer.
17    Lexer(lexer::Error),
18    /// More input available.
19    MoreInput(Vec<Token<'input>>),
20    /// Encountered empty predicate in a set of predicates.
21    EmptyPredicate,
22    /// Encountered an empty range.
23    EmptyRange,
24}
25
26impl<'input> From<lexer::Error> for Error<'input> {
27    fn from(value: lexer::Error) -> Self {
28        Error::Lexer(value)
29    }
30}
31
32impl<'input> fmt::Display for Error<'input> {
33    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
34        use self::Error::*;
35
36        match *self {
37            UnexpectedEnd => write!(fmt, "expected more input"),
38            UnexpectedToken(ref token) => write!(fmt, "encountered unexpected token: {:?}", token),
39            Lexer(ref error) => write!(fmt, "lexer error: {:?}", error),
40            MoreInput(ref tokens) => write!(fmt, "expected end of input, but got: {:?}", tokens),
41            EmptyPredicate => write!(fmt, "encountered empty predicate"),
42            EmptyRange => write!(fmt, "encountered empty range"),
43        }
44    }
45}
46
47/// impl for backwards compatibility.
48impl<'input> From<Error<'input>> for String {
49    fn from(value: Error<'input>) -> Self {
50        value.to_string()
51    }
52}
53
54/// A recursive-descent parser for parsing version requirements.
55pub struct Parser<'input> {
56    /// Source of token.
57    lexer: Lexer<'input>,
58    /// Lookaehead.
59    c1: Option<Token<'input>>,
60}
61
62impl<'input> Parser<'input> {
63    /// Construct a new parser for the given input.
64    pub fn new(input: &'input str) -> Result<Parser<'input>, Error<'input>> {
65        let mut lexer = Lexer::new(input);
66
67        let c1 = if let Some(c1) = lexer.next() {
68            Some(c1?)
69        } else {
70            None
71        };
72
73        Ok(Parser { lexer, c1 })
74    }
75
76    /// Pop one token.
77    #[inline(always)]
78    fn pop(&mut self) -> Result<Token<'input>, Error<'input>> {
79        let c1 = if let Some(c1) = self.lexer.next() {
80            Some(c1?)
81        } else {
82            None
83        };
84
85        mem::replace(&mut self.c1, c1).ok_or_else(|| UnexpectedEnd)
86    }
87
88    /// Peek one token.
89    #[inline(always)]
90    fn peek(&mut self) -> Option<&Token<'input>> {
91        self.c1.as_ref()
92    }
93
94    /// Skip whitespace if present.
95    fn skip_whitespace(&mut self) -> Result<(), Error<'input>> {
96        match self.peek() {
97            Some(&Token::Whitespace(_, _)) => self.pop().map(|_| ()),
98            _ => Ok(()),
99        }
100    }
101
102    /// Parse a single component.
103    ///
104    /// Returns `None` if the component is a wildcard.
105    pub fn component(&mut self) -> Result<Option<u64>, Error<'input>> {
106        match self.pop()? {
107            Token::Numeric(number) => Ok(Some(number)),
108            ref t if t.is_wildcard() => Ok(None),
109            tok => Err(UnexpectedToken(tok)),
110        }
111    }
112
113    /// Parse a single numeric.
114    pub fn numeric(&mut self) -> Result<u64, Error<'input>> {
115        match self.pop()? {
116            Token::Numeric(number) => Ok(number),
117            tok => Err(UnexpectedToken(tok)),
118        }
119    }
120
121    /// Optionally parse a dot, then a component.
122    ///
123    /// The second component of the tuple indicates if a wildcard has been encountered, and is
124    /// always `false` if the first component is `Some`.
125    ///
126    /// If a dot is not encountered, `(None, false)` is returned.
127    ///
128    /// If a wildcard is encountered, `(None, true)` is returned.
129    pub fn dot_component(&mut self) -> Result<(Option<u64>, bool), Error<'input>> {
130        match self.peek() {
131            Some(&Token::Dot) => {}
132            _ => return Ok((None, false)),
133        }
134
135        // pop the peeked dot.
136        self.pop()?;
137        self.component().map(|n| (n, n.is_none()))
138    }
139
140    /// Parse a dot, then a numeric.
141    pub fn dot_numeric(&mut self) -> Result<u64, Error<'input>> {
142        match self.pop()? {
143            Token::Dot => {}
144            tok => return Err(UnexpectedToken(tok)),
145        }
146
147        self.numeric()
148    }
149
150    /// Parse an string identifier.
151    ///
152    /// Like, `foo`, or `bar`, or `beta-1`.
153    pub fn identifier(&mut self) -> Result<Identifier, Error<'input>> {
154        self.bounded_identifier(0)
155    }
156
157    fn bounded_identifier(&mut self, count: u32) -> Result<Identifier, Error<'input>> {
158        if count > 255 {
159            panic!("Cannot have more than 255 identifiers");
160        }
161
162        let identifier = match self.pop()? {
163            Token::AlphaNumeric(identifier) => {
164                // TODO: Borrow?
165                Identifier::AlphaNumeric(identifier.to_string())
166            }
167            Token::Numeric(n) => Identifier::Numeric(n),
168            tok => return Err(UnexpectedToken(tok)),
169        };
170
171        if let Some(&Token::Hyphen) = self.peek() {
172            // pop the peeked hyphen
173            self.pop()?;
174            // concat with any following identifiers
175            Ok(identifier
176                .concat("-")
177                .concat(&self.bounded_identifier(count + 1)?.to_string()))
178        } else {
179            Ok(identifier)
180        }
181    }
182
183    /// Parse all pre-release identifiers, separated by dots.
184    ///
185    /// Like, `abcdef.1234`.
186    fn pre(&mut self) -> Result<Vec<Identifier>, Error<'input>> {
187        match self.peek() {
188            Some(&Token::Hyphen) => {}
189            _ => return Ok(vec![]),
190        }
191
192        // pop the peeked hyphen.
193        self.pop()?;
194        self.parts()
195    }
196
197    /// Parse a dot-separated set of identifiers.
198    fn parts(&mut self) -> Result<Vec<Identifier>, Error<'input>> {
199        let mut parts = Vec::new();
200
201        parts.push(self.identifier()?);
202
203        while let Some(&Token::Dot) = self.peek() {
204            self.pop()?;
205
206            parts.push(self.identifier()?);
207        }
208
209        Ok(parts)
210    }
211
212    /// Parse optional build metadata.
213    ///
214    /// Like, `` (empty), or `+abcdef`.
215    fn plus_build_metadata(&mut self) -> Result<Vec<Identifier>, Error<'input>> {
216        match self.peek() {
217            Some(&Token::Plus) => {}
218            _ => return Ok(vec![]),
219        }
220
221        // pop the plus.
222        self.pop()?;
223        self.parts()
224    }
225
226    /// Parse a version.
227    ///
228    /// Like, `1.0.0` or `3.0.0-beta.1`.
229    pub fn version(&mut self) -> Result<Version, Error<'input>> {
230        self.skip_whitespace()?;
231
232        let major = self.numeric()?;
233        let minor = self.dot_numeric()?;
234        let patch = self.dot_numeric()?;
235        let pre = self.pre()?;
236        let build = self.plus_build_metadata()?;
237
238        self.skip_whitespace()?;
239
240        Ok(Version {
241            major,
242            minor,
243            patch,
244            pre,
245            build,
246        })
247    }
248
249    /// Check if we have reached the end of input.
250    pub fn is_eof(&mut self) -> bool {
251        self.c1.is_none()
252    }
253
254    /// Get the rest of the tokens in the parser.
255    ///
256    /// Useful for debugging.
257    pub fn tail(&mut self) -> Result<Vec<Token<'input>>, Error<'input>> {
258        let mut out = Vec::new();
259
260        if let Some(t) = self.c1.take() {
261            out.push(t);
262        }
263
264        while let Some(t) = self.lexer.next() {
265            out.push(t?);
266        }
267
268        Ok(out)
269    }
270}
271
272#[cfg(test)]
273mod tests {
274    use crate::version::parse;
275
276    #[test]
277    #[should_panic(expected = "Cannot have more than 255 identifiers")]
278    fn fuzz_0001() {
279        let version = std::fs::read_to_string("tests/fixtures/fuzz-0001.txt").expect("should be able to read version from file");
280
281        parse(&version).ok();
282    }
283
284}