1use self::Error::*;
39use self::Token::*;
40use std::str;
41
42macro_rules! scan_while {
43 ($slf:expr, $start:expr, $first:pat $(| $rest:pat)*) => {{
44 let mut __end = $start;
45
46 loop {
47 if let Some((idx, c)) = $slf.one() {
48 __end = idx;
49
50 match c {
51 $first $(| $rest)* => $slf.step(),
52 _ => break,
53 }
54
55 continue;
56 } else {
57 __end = $slf.input.len();
58 }
59
60 break;
61 }
62
63 __end
64 }}
65}
66
67#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
69pub enum Token<'input> {
70 Eq,
72 Gt,
74 Lt,
76 LtEq,
78 GtEq,
80 Caret,
82 Tilde,
84 Star,
86 Dot,
88 Comma,
90 Hyphen,
92 Plus,
94 Or,
96 Whitespace(usize, usize),
98 Numeric(u64),
100 AlphaNumeric(&'input str),
102}
103
104impl<'input> Token<'input> {
105 pub fn is_whitespace(&self) -> bool {
107 match *self {
108 Whitespace(..) => true,
109 _ => false,
110 }
111 }
112
113 pub fn is_wildcard(&self) -> bool {
115 match *self {
116 Star | AlphaNumeric("X") | AlphaNumeric("x") => true,
117 _ => false,
118 }
119 }
120}
121
122#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
123pub enum Error {
124 UnexpectedChar(char),
126}
127
128#[derive(Debug)]
130pub struct Lexer<'input> {
131 input: &'input str,
132 chars: str::CharIndices<'input>,
133 c1: Option<(usize, char)>,
135 c2: Option<(usize, char)>,
136}
137
138impl<'input> Lexer<'input> {
139 pub fn new(input: &str) -> Lexer {
141 let mut chars = input.char_indices();
142 let c1 = chars.next();
143 let c2 = chars.next();
144
145 Lexer {
146 input,
147 chars,
148 c1,
149 c2,
150 }
151 }
152
153 fn step(&mut self) {
155 self.c1 = self.c2;
156 self.c2 = self.chars.next();
157 }
158
159 fn step_n(&mut self, n: usize) {
160 for _ in 0..n {
161 self.step();
162 }
163 }
164
165 fn one(&mut self) -> Option<(usize, char)> {
167 self.c1
168 }
169
170 fn two(&mut self) -> Option<(usize, char, char)> {
172 self.c1
173 .and_then(|(start, c1)| self.c2.map(|(_, c2)| (start, c1, c2)))
174 }
175
176 fn component(&mut self, start: usize) -> Result<Token<'input>, Error> {
181 let end = scan_while!(self, start, '0'..='9' | 'A'..='Z' | 'a'..='z');
182 let input = &self.input[start..end];
183
184 let mut it = input.chars();
185 let (a, b) = (it.next(), it.next());
186
187 if a == Some('0') && b.is_none() {
189 return Ok(Numeric(0));
190 }
191
192 if a != Some('0') {
193 if let Ok(numeric) = input.parse::<u64>() {
194 return Ok(Numeric(numeric));
195 }
196 }
197
198 Ok(AlphaNumeric(input))
199 }
200
201 fn whitespace(&mut self, start: usize) -> Result<Token<'input>, Error> {
203 let end = scan_while!(self, start, ' ' | '\t' | '\n' | '\r');
204 Ok(Whitespace(start, end))
205 }
206}
207
208impl<'input> Iterator for Lexer<'input> {
209 type Item = Result<Token<'input>, Error>;
210
211 fn next(&mut self) -> Option<Self::Item> {
212 #[allow(clippy::never_loop)]
213 loop {
214 if let Some((_, a, b)) = self.two() {
216 let two = match (a, b) {
217 ('<', '=') => Some(LtEq),
218 ('>', '=') => Some(GtEq),
219 ('|', '|') => Some(Or),
220 _ => None,
221 };
222
223 if let Some(two) = two {
224 self.step_n(2);
225 return Some(Ok(two));
226 }
227 }
228
229 if let Some((start, c)) = self.one() {
231 let tok = match c {
232 ' ' | '\t' | '\n' | '\r' => {
233 self.step();
234 return Some(self.whitespace(start));
235 }
236 '=' => Eq,
237 '>' => Gt,
238 '<' => Lt,
239 '^' => Caret,
240 '~' => Tilde,
241 '*' => Star,
242 '.' => Dot,
243 ',' => Comma,
244 '-' => Hyphen,
245 '+' => Plus,
246 '0'..='9' | 'a'..='z' | 'A'..='Z' => {
247 self.step();
248 return Some(self.component(start));
249 }
250 c => return Some(Err(UnexpectedChar(c))),
251 };
252
253 self.step();
254 return Some(Ok(tok));
255 };
256
257 return None;
258 }
259 }
260}
261
262#[cfg(test)]
263mod tests {
264 use super::*;
265
266 fn lex(input: &str) -> Vec<Token> {
267 Lexer::new(input).map(Result::unwrap).collect::<Vec<_>>()
268 }
269
270 #[test]
271 pub fn simple_tokens() {
272 assert_eq!(
273 lex("=><<=>=^~*.,-+||"),
274 vec![Eq, Gt, Lt, LtEq, GtEq, Caret, Tilde, Star, Dot, Comma, Hyphen, Plus, Or,]
275 );
276 }
277
278 #[test]
279 pub fn whitespace() {
280 assert_eq!(
281 lex(" foo \t\n\rbar"),
282 vec![
283 Whitespace(0, 2),
284 AlphaNumeric("foo"),
285 Whitespace(5, 9),
286 AlphaNumeric("bar"),
287 ]
288 );
289 }
290
291 #[test]
292 pub fn components() {
293 assert_eq!(lex("42"), vec![Numeric(42)]);
294 assert_eq!(lex("0"), vec![Numeric(0)]);
295 assert_eq!(lex("01"), vec![AlphaNumeric("01")]);
296 assert_eq!(lex("01"), vec![AlphaNumeric("01")]);
297 assert_eq!(lex("5885644aa"), vec![AlphaNumeric("5885644aa")]);
298 assert_eq!(lex("beta2"), vec![AlphaNumeric("beta2")]);
299 assert_eq!(lex("beta.2"), vec![AlphaNumeric("beta"), Dot, Numeric(2)]);
300 }
301
302 #[test]
303 pub fn is_wildcard() {
304 assert_eq!(Star.is_wildcard(), true);
305 assert_eq!(AlphaNumeric("x").is_wildcard(), true);
306 assert_eq!(AlphaNumeric("X").is_wildcard(), true);
307 assert_eq!(AlphaNumeric("other").is_wildcard(), false);
308 }
309
310 #[test]
311 pub fn empty() {
312 assert_eq!(lex(""), vec![]);
313 }
314
315 #[test]
316 pub fn numeric_all_numbers() {
317 let expected: Vec<Token> = vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
318 .into_iter()
319 .map(Numeric)
320 .collect::<Vec<_>>();
321
322 let actual: Vec<_> = lex("0 1 2 3 4 5 6 7 8 9")
323 .into_iter()
324 .filter(|t| !t.is_whitespace())
325 .collect();
326
327 assert_eq!(actual, expected);
328 }
329}