1use std::{iter::Rev, str::Chars};
2
3use crate::Range;
4
5pub const C_LINE_FEED: char = '\n';
6pub const C_CARRIAGE_RETURN: char = '\r';
7pub const C_FORM_FEED: char = '\u{c}';
8
9pub const C_TAB: char = '\t';
10pub const C_SPACE: char = ' ';
11
12pub const C_SOLIDUS: char = '/';
13pub const C_REVERSE_SOLIDUS: char = '\\';
14pub const C_ASTERISK: char = '*';
15
16pub const C_LEFT_PARENTHESIS: char = '(';
17pub const C_RIGHT_PARENTHESIS: char = ')';
18pub const C_LEFT_CURLY: char = '{';
19pub const C_RIGHT_CURLY: char = '}';
20pub const C_LEFT_SQUARE: char = '[';
21pub const C_RIGHT_SQUARE: char = ']';
22
23pub const C_QUOTATION_MARK: char = '"';
24pub const C_APOSTROPHE: char = '\'';
25
26pub const C_FULL_STOP: char = '.';
27pub const C_COLON: char = ':';
28pub const C_SEMICOLON: char = ';';
29pub const C_COMMA: char = ',';
30pub const C_PERCENTAGE: char = '%';
31pub const C_AT_SIGN: char = '@';
32
33pub const C_LOW_LINE: char = '_';
34pub const C_LOWER_A: char = 'a';
35pub const C_LOWER_E: char = 'e';
36pub const C_LOWER_F: char = 'f';
37pub const C_LOWER_Z: char = 'z';
38pub const C_UPPER_A: char = 'A';
39pub const C_UPPER_E: char = 'E';
40pub const C_UPPER_F: char = 'F';
41pub const C_UPPER_Z: char = 'Z';
42pub const C_0: char = '0';
43pub const C_9: char = '9';
44
45pub const C_NUMBER_SIGN: char = '#';
46pub const C_PLUS_SIGN: char = '+';
47pub const C_HYPHEN_MINUS: char = '-';
48
49pub const C_LESS_THAN_SIGN: char = '<';
50pub const C_GREATER_THAN_SIGN: char = '>';
51
52pub type Pos = u32;
53
54pub trait Visitor<'s> {
55 fn function(&mut self, lexer: &mut Lexer<'s>, start: Pos, end: Pos) -> Option<()>;
56 fn ident(&mut self, lexer: &mut Lexer<'s>, start: Pos, end: Pos) -> Option<()>;
57 fn url(
58 &mut self,
59 lexer: &mut Lexer<'s>,
60 start: Pos,
61 end: Pos,
62 content_start: Pos,
63 content_end: Pos,
64 ) -> Option<()>;
65 fn string(&mut self, lexer: &mut Lexer<'s>, start: Pos, end: Pos) -> Option<()>;
66 fn is_selector(&mut self, lexer: &mut Lexer<'s>) -> Option<bool>;
67 fn id(&mut self, lexer: &mut Lexer<'s>, start: Pos, end: Pos) -> Option<()>;
68 fn left_parenthesis(&mut self, lexer: &mut Lexer<'s>, start: Pos, end: Pos) -> Option<()>;
69 fn right_parenthesis(&mut self, lexer: &mut Lexer<'s>, start: Pos, end: Pos) -> Option<()>;
70 fn comma(&mut self, lexer: &mut Lexer<'s>, start: Pos, end: Pos) -> Option<()>;
71 fn class(&mut self, lexer: &mut Lexer<'s>, start: Pos, end: Pos) -> Option<()>;
72 fn pseudo_function(&mut self, lexer: &mut Lexer<'s>, start: Pos, end: Pos) -> Option<()>;
73 fn pseudo_class(&mut self, lexer: &mut Lexer<'s>, start: Pos, end: Pos) -> Option<()>;
74 fn semicolon(&mut self, lexer: &mut Lexer<'s>, start: Pos, end: Pos) -> Option<()>;
75 fn at_keyword(&mut self, lexer: &mut Lexer<'s>, start: Pos, end: Pos) -> Option<()>;
76 fn left_curly_bracket(&mut self, lexer: &mut Lexer<'s>, start: Pos, end: Pos) -> Option<()>;
77 fn right_curly_bracket(&mut self, lexer: &mut Lexer<'s>, start: Pos, end: Pos) -> Option<()>;
78}
79
80#[derive(Debug, Clone)]
81pub struct Lexer<'s, I: Iterator<Item = char> = Chars<'s>> {
82 value: &'s str,
83 iter: I,
84 cur_pos: Option<Pos>,
85 cur: Option<char>,
86 peek: Option<char>,
87 peek2: Option<char>,
88}
89
90impl<'s> Lexer<'s> {
91 pub fn new(value: &'s str) -> Self {
92 let mut iter = value.chars();
93 let peek = iter.next();
94 let peek2 = iter.next();
95 Self {
96 value,
97 iter,
98 cur_pos: None,
99 cur: None,
100 peek,
101 peek2,
102 }
103 }
104
105 pub fn turn_back(self, end: Pos) -> Lexer<'s, Rev<Chars<'s>>> {
106 let value = self.slice(0, end).unwrap();
107 let mut iter = value.chars().rev();
108 let peek = iter.next();
109 let peek2 = iter.next();
110 Lexer {
111 value,
112 iter,
113 cur_pos: None,
114 cur: None,
115 peek,
116 peek2,
117 }
118 }
119
120 pub fn slice(&self, start: Pos, end: Pos) -> Option<&'s str> {
121 Self::slice_range(self.value, &Range::new(start, end))
122 }
123
124 pub fn slice_range<'a>(input: &'a str, range: &Range) -> Option<&'a str> {
125 input.get(range.start as usize..range.end as usize)
126 }
127}
128
129impl<'s, I: Iterator<Item = char>> Lexer<'s, I> {
130 pub fn consume(&mut self) {
131 self.cur_pos = self.peek_pos();
132 self.cur = self.peek;
133 self.peek = self.peek2;
134 self.peek2 = self.iter.next();
135 }
136
137 pub fn cur_pos(&self) -> Option<Pos> {
138 self.cur_pos
139 }
140
141 pub fn cur(&self) -> Option<char> {
142 self.cur
143 }
144
145 pub fn peek_pos(&self) -> Option<Pos> {
146 if let Some(pos) = self.cur_pos() {
147 self.cur().map(|c| pos + c.len_utf8() as u32)
148 } else {
149 Some(0)
150 }
151 }
152
153 pub fn peek(&self) -> Option<char> {
154 self.peek
155 }
156
157 pub fn peek2_pos(&self) -> Option<Pos> {
158 self.peek_pos()
159 .and_then(|pos| self.peek().map(|c| pos + c.len_utf8() as u32))
160 }
161
162 pub fn peek2(&self) -> Option<char> {
163 self.peek2
164 }
165}
166
167impl<'s> Lexer<'s> {
168 pub fn lex<T: Visitor<'s>>(&mut self, visitor: &mut T) {
169 self.lex_impl(visitor);
170 }
171
172 fn lex_impl<T: Visitor<'s>>(&mut self, visitor: &mut T) -> Option<()> {
173 self.consume();
174 while self.cur().is_some() {
175 self.consume_comments()?;
176 match self.cur()? {
178 c if is_white_space(c) => self.consume_space()?,
179 C_QUOTATION_MARK => self.consume_string(visitor, C_QUOTATION_MARK)?,
180 C_NUMBER_SIGN => self.consume_number_sign(visitor)?,
181 C_APOSTROPHE => self.consume_string(visitor, C_APOSTROPHE)?,
182 C_LEFT_PARENTHESIS => self.consume_left_parenthesis(visitor)?,
183 C_RIGHT_PARENTHESIS => self.consume_right_parenthesis(visitor)?,
184 C_PLUS_SIGN => self.consume_plus_sign()?,
185 C_COMMA => self.consume_comma(visitor)?,
186 C_HYPHEN_MINUS => self.consume_minus(visitor)?,
187 C_FULL_STOP => self.consume_full_stop(visitor)?,
188 C_COLON => self.consume_potential_pseudo(visitor)?,
189 C_SEMICOLON => self.consume_semicolon(visitor)?,
190 C_LESS_THAN_SIGN => self.consume_less_than_sign()?,
191 C_AT_SIGN => self.consume_at_sign(visitor)?,
192 C_LEFT_SQUARE => self.consume_delim(),
193 C_REVERSE_SOLIDUS => self.consume_reverse_solidus(visitor)?,
194 C_RIGHT_SQUARE => self.consume_delim(),
195 C_LEFT_CURLY => self.consume_left_curly(visitor)?,
196 C_RIGHT_CURLY => self.consume_right_curly(visitor)?,
197 c if is_digit(c) => self.consume_numeric_token()?,
198 c if is_ident_start(c) => self.consume_ident_like(visitor)?,
199 _ => self.consume_delim(),
200 }
201 }
202 Some(())
203 }
204
205 pub fn consume_delim(&mut self) {
206 self.consume();
207 }
208
209 pub fn consume_numeric_token(&mut self) -> Option<()> {
210 self.consume_number()?;
211 let c = self.cur()?;
212 if start_ident_sequence(c, self.peek()?, self.peek2()?) {
213 return self.consume_ident_sequence();
214 }
215 if c == C_PERCENTAGE {
216 self.consume();
217 }
218 Some(())
219 }
220
221 pub fn consume_number(&mut self) -> Option<()> {
222 self.consume();
223 while is_digit(self.cur()?) {
224 self.consume();
225 }
226 if self.cur()? == C_FULL_STOP && is_digit(self.peek()?) {
227 self.consume();
228 self.consume();
229 while is_digit(self.cur()?) {
230 self.consume();
231 }
232 }
233 let c = self.cur()?;
234 if c == C_LOWER_E || c == C_UPPER_E {
235 let c = self.peek()?;
236 if is_digit(c) {
237 self.consume();
238 } else if c == C_HYPHEN_MINUS || c == C_PLUS_SIGN {
239 let c = self.peek2()?;
240 if is_digit(c) {
241 self.consume();
242 self.consume();
243 } else {
244 return Some(());
245 }
246 } else {
247 return Some(());
248 }
249 } else {
250 return Some(());
251 }
252 self.consume();
253 while is_digit(self.cur()?) {
254 self.consume();
255 }
256 Some(())
257 }
258
259 pub fn consume_ident_sequence(&mut self) -> Option<()> {
260 loop {
261 let c = self.cur()?;
262 if maybe_valid_escape(c) {
263 self.consume();
264 self.consume_escaped()?;
265 } else if is_ident(c) {
266 self.consume();
267 } else {
268 return Some(());
269 }
270 }
271 }
272
273 pub fn consume_escaped(&mut self) -> Option<()> {
274 if is_hex_digit(self.cur()?) {
275 for _ in 1..5 {
276 self.consume();
277 if !is_hex_digit(self.cur()?) {
278 break;
279 }
280 }
281 if is_white_space(self.cur()?) {
282 self.consume();
283 }
284 } else {
285 self.consume();
286 }
287 Some(())
288 }
289
290 pub fn consume_ident_like<T: Visitor<'s>>(&mut self, visitor: &mut T) -> Option<()> {
291 let start = self.cur_pos()?;
292 self.consume_ident_sequence()?;
293 let peek_pos = self.peek_pos()?;
294 if self.cur_pos()? == start + 3 && self.slice(start, peek_pos)?.eq_ignore_ascii_case("url(")
295 {
296 self.consume();
297 while is_white_space(self.cur()?) {
298 self.consume();
299 }
300 let c = self.cur()?;
301 if c == C_QUOTATION_MARK || c == C_APOSTROPHE {
302 visitor.function(self, start, peek_pos)
303 } else {
304 self.consume_url(visitor, start)
305 }
306 } else if self.cur()? == C_LEFT_PARENTHESIS {
307 self.consume();
308 visitor.function(self, start, self.cur_pos()?)
309 } else {
310 visitor.ident(self, start, self.cur_pos()?)
311 }
312 }
313
314 pub fn consume_url<T: Visitor<'s>>(
315 self: &mut Lexer<'s>,
316 visitor: &mut T,
317 start: Pos,
318 ) -> Option<()> {
319 let content_start = self.cur_pos()?;
320 loop {
321 let c = self.cur()?;
322 if maybe_valid_escape(c) {
323 self.consume();
324 self.consume_escaped()?;
325 } else if is_white_space(c) {
326 let content_end = self.cur_pos()?;
327 self.consume();
328 while is_white_space(self.cur()?) {
329 self.consume();
330 }
331 if self.cur()? != C_RIGHT_PARENTHESIS {
332 return Some(());
333 }
334 self.consume();
335 return visitor.url(self, start, self.cur_pos()?, content_start, content_end);
336 } else if c == C_RIGHT_PARENTHESIS {
337 let content_end = self.cur_pos()?;
338 self.consume();
339 return visitor.url(self, start, self.cur_pos()?, content_start, content_end);
340 } else if c == C_LEFT_PARENTHESIS {
341 return Some(());
342 } else {
343 self.consume();
344 }
345 }
346 }
347
348 pub fn consume_string<T: Visitor<'s>>(&mut self, visitor: &mut T, end: char) -> Option<()> {
349 let start = self.cur_pos()?;
350 self.consume();
351 loop {
352 let c = self.cur()?;
353 if c == end {
354 self.consume();
355 break;
356 } else if is_new_line(c) {
357 break;
358 } else if c == C_REVERSE_SOLIDUS {
359 self.consume();
360 let c2 = self.cur()?;
361 if is_new_line(c2) {
362 self.consume();
363 } else if are_valid_escape(c, c2) {
364 self.consume_escaped()?;
365 }
366 } else {
367 self.consume();
368 }
369 }
370 visitor.string(self, start, self.cur_pos()?)
371 }
372
373 pub fn consume_number_sign<T: Visitor<'s>>(&mut self, visitor: &mut T) -> Option<()> {
374 let c2 = self.peek()?;
375 let start = self.cur_pos()?;
376 if is_ident(c2) || are_valid_escape(c2, self.peek2()?) {
377 self.consume();
378 if !visitor.is_selector(self)? {
379 return Some(());
380 }
381 if !start_ident_sequence(self.cur()?, self.peek()?, self.peek2()?) {
382 return visitor.id(self, start, self.cur_pos()?);
383 }
384 self.consume_ident_sequence()?;
385 visitor.id(self, start, self.cur_pos()?)
386 } else {
387 self.consume_delim();
388 visitor.id(self, start, self.cur_pos()?)
389 }
390 }
391
392 pub fn consume_left_parenthesis<T: Visitor<'s>>(&mut self, visitor: &mut T) -> Option<()> {
393 self.consume();
394 let end = self.cur_pos()?;
395 visitor.left_parenthesis(self, end - 1, end)
396 }
397
398 pub fn consume_right_parenthesis<T: Visitor<'s>>(&mut self, visitor: &mut T) -> Option<()> {
399 self.consume();
400 let end = self.cur_pos()?;
401 visitor.right_parenthesis(self, end - 1, end)
402 }
403
404 pub fn consume_plus_sign(&mut self) -> Option<()> {
405 if start_number(self.cur()?, self.peek()?, self.peek2()?) {
406 self.consume_numeric_token()?;
407 } else {
408 self.consume_delim();
409 }
410 Some(())
411 }
412
413 pub fn consume_comma<T: Visitor<'s>>(&mut self, visitor: &mut T) -> Option<()> {
414 self.consume();
415 let end = self.cur_pos()?;
416 visitor.comma(self, end - 1, end)
417 }
418
419 pub fn consume_minus<T: Visitor<'s>>(&mut self, visitor: &mut T) -> Option<()> {
420 let c = self.cur()?;
421 let c2 = self.peek()?;
422 let c3 = self.peek2()?;
423 if start_number(c, c2, c3) {
424 self.consume_numeric_token()?;
425 } else if c2 == C_HYPHEN_MINUS && c3 == C_GREATER_THAN_SIGN {
426 self.consume();
427 self.consume();
428 } else if start_ident_sequence(c, c2, c3) {
429 self.consume_ident_like(visitor)?;
430 } else {
431 self.consume_delim();
432 }
433 Some(())
434 }
435
436 pub fn consume_full_stop<T: Visitor<'s>>(&mut self, visitor: &mut T) -> Option<()> {
437 let c = self.cur()?;
438 let c2 = self.peek()?;
439 let c3 = self.peek2()?;
440 if start_number(c, c2, c3) {
441 return self.consume_numeric_token();
442 }
443 let start = self.cur_pos()?;
444 self.consume();
445 if !visitor.is_selector(self)? {
446 return Some(());
447 }
448 if !start_ident_sequence(c2, c3, self.peek2()?) {
449 return visitor.class(self, start, self.cur_pos()?);
450 }
451 self.consume_ident_sequence()?;
452 visitor.class(self, start, self.cur_pos()?)
453 }
454
455 pub fn consume_potential_pseudo<T: Visitor<'s>>(&mut self, visitor: &mut T) -> Option<()> {
456 let start = self.cur_pos()?;
457 self.consume();
458 if !visitor.is_selector(self)?
459 || !start_ident_sequence(self.cur()?, self.peek()?, self.peek2()?)
460 {
461 return Some(());
462 }
463 self.consume_ident_sequence()?;
464 if self.cur()? == C_LEFT_PARENTHESIS {
465 self.consume();
466 visitor.pseudo_function(self, start, self.cur_pos()?)
467 } else {
468 visitor.pseudo_class(self, start, self.cur_pos()?)
469 }
470 }
471
472 pub fn consume_semicolon<T: Visitor<'s>>(&mut self, visitor: &mut T) -> Option<()> {
473 self.consume();
474 let end = self.cur_pos()?;
475 visitor.semicolon(self, end - 1, end)
476 }
477
478 pub fn consume_less_than_sign(&mut self) -> Option<()> {
479 self.consume();
480 if self.cur()? == '!' && self.peek()? == '-' && self.peek2()? == '-' {
481 self.consume();
482 self.consume();
483 self.consume();
484 }
485 Some(())
486 }
487
488 pub fn consume_at_sign<T: Visitor<'s>>(&mut self, visitor: &mut T) -> Option<()> {
489 let start = self.cur_pos()?;
490 self.consume();
491 if start_ident_sequence(self.cur()?, self.peek()?, self.peek2()?) {
492 self.consume_ident_sequence()?;
493 return visitor.at_keyword(self, start, self.cur_pos()?);
494 }
495 Some(())
496 }
497
498 pub fn consume_reverse_solidus<T: Visitor<'s>>(&mut self, visitor: &mut T) -> Option<()> {
499 if are_valid_escape(self.cur()?, self.peek()?) {
500 self.consume_ident_like(visitor)?;
501 } else {
502 self.consume_delim();
503 }
504 Some(())
505 }
506
507 pub fn consume_left_curly<T: Visitor<'s>>(&mut self, visitor: &mut T) -> Option<()> {
508 self.consume();
509 let end = self.cur_pos()?;
510 visitor.left_curly_bracket(self, end - 1, end)
511 }
512
513 pub fn consume_right_curly<T: Visitor<'s>>(&mut self, visitor: &mut T) -> Option<()> {
514 self.consume();
515 let end = self.cur_pos()?;
516 visitor.right_curly_bracket(self, end - 1, end)
517 }
518}
519
520impl<'s, I: Iterator<Item = char>> Lexer<'s, I> {
521 pub fn consume_comments(&mut self) -> Option<()> {
522 if self.cur()? == C_SOLIDUS && self.peek()? == C_ASTERISK {
523 self.consume();
524 loop {
525 self.consume();
526 let c = self.cur()?;
527 if c == C_ASTERISK && self.peek()? == C_SOLIDUS {
528 self.consume();
529 self.consume();
530 break;
531 }
532 }
533 }
534 Some(())
535 }
536
537 pub fn consume_space(&mut self) -> Option<()> {
538 self.consume();
539 while is_white_space(self.cur()?) {
540 self.consume();
541 }
542 Some(())
543 }
544
545 pub fn consume_white_space_and_comments(&mut self) -> Option<()> {
546 loop {
547 self.consume_comments()?;
548 if is_white_space(self.cur()?) {
549 self.consume_space()?;
550 } else {
551 break;
552 }
553 }
554 Some(())
555 }
556}
557
558pub fn is_new_line(c: char) -> bool {
559 c == C_LINE_FEED || c == C_CARRIAGE_RETURN || c == C_FORM_FEED
560}
561
562pub fn is_space(c: char) -> bool {
563 c == C_TAB || c == C_SPACE
564}
565
566pub fn is_white_space(c: char) -> bool {
567 is_new_line(c) || is_space(c)
568}
569
570pub fn is_digit(c: char) -> bool {
571 c >= C_0 && c <= C_9
572}
573
574pub fn is_hex_digit(c: char) -> bool {
575 is_digit(c) || (c >= C_UPPER_A && c <= C_UPPER_F) || (c >= C_LOWER_A && c <= C_LOWER_F)
576}
577
578pub fn is_ident_start(c: char) -> bool {
579 c == C_LOW_LINE
580 || (c >= C_LOWER_A && c <= C_LOWER_Z)
581 || (c >= C_UPPER_A && c <= C_UPPER_Z)
582 || c > '\u{80}'
583}
584
585pub fn is_ident(c: char) -> bool {
586 is_ident_start(c) || is_digit(c) || c == C_HYPHEN_MINUS
587}
588
589pub fn start_ident_sequence(c1: char, c2: char, c3: char) -> bool {
590 if c1 == C_HYPHEN_MINUS {
591 is_ident_start(c2) || c2 == C_HYPHEN_MINUS || are_valid_escape(c2, c3)
592 } else {
593 is_ident_start(c1) || are_valid_escape(c1, c2)
594 }
595}
596
597pub fn maybe_valid_escape(c: char) -> bool {
598 c == C_REVERSE_SOLIDUS
599}
600
601pub fn are_valid_escape(c1: char, c2: char) -> bool {
602 c1 == C_REVERSE_SOLIDUS && !is_new_line(c2)
603}
604
605pub fn start_number(c1: char, c2: char, c3: char) -> bool {
606 if c1 == C_PLUS_SIGN || c1 == C_HYPHEN_MINUS {
607 is_digit(c2) || (c2 == C_FULL_STOP && is_digit(c3))
608 } else {
609 is_digit(c1) || (c1 == C_FULL_STOP && is_digit(c2))
610 }
611}
612
613#[cfg(test)]
614mod tests {
615 use super::*;
616 use indoc::indoc;
617
618 fn assert_lexer_state<I: Iterator<Item = char>>(
619 lexer: &Lexer<'_, I>,
620 cur: Option<char>,
621 cur_pos: Option<Pos>,
622 peek: Option<char>,
623 peek_pos: Option<Pos>,
624 peek2: Option<char>,
625 peek2_pos: Option<Pos>,
626 ) {
627 assert_eq!(lexer.cur(), cur);
628 assert_eq!(lexer.cur_pos(), cur_pos);
629 assert_eq!(lexer.peek(), peek);
630 assert_eq!(lexer.peek_pos(), peek_pos);
631 assert_eq!(lexer.peek2(), peek2);
632 assert_eq!(lexer.peek2_pos(), peek2_pos);
633 }
634
635 #[derive(Default)]
636 struct Snapshot {
637 results: Vec<(String, String)>,
638 }
639
640 impl Snapshot {
641 pub fn add(&mut self, key: &str, value: &str) {
642 self.results.push((key.to_string(), value.to_string()))
643 }
644
645 pub fn snapshot(&self) -> String {
646 self.results
647 .iter()
648 .map(|(k, v)| format!("{k}: {v}\n"))
649 .collect::<String>()
650 }
651 }
652
653 impl Visitor<'_> for Snapshot {
654 fn function(&mut self, lexer: &mut Lexer, start: Pos, end: Pos) -> Option<()> {
655 self.add("function", lexer.slice(start, end)?);
656 Some(())
657 }
658
659 fn ident(&mut self, lexer: &mut Lexer, start: Pos, end: Pos) -> Option<()> {
660 self.add("ident", lexer.slice(start, end)?);
661 Some(())
662 }
663
664 fn url(
665 &mut self,
666 lexer: &mut Lexer,
667 _: Pos,
668 _: Pos,
669 content_start: Pos,
670 content_end: Pos,
671 ) -> Option<()> {
672 self.add("url", lexer.slice(content_start, content_end)?);
673 Some(())
674 }
675
676 fn string(&mut self, lexer: &mut Lexer, start: Pos, end: Pos) -> Option<()> {
677 self.add("string", lexer.slice(start, end)?);
678 Some(())
679 }
680
681 fn is_selector(&mut self, _: &mut Lexer) -> Option<bool> {
682 Some(true)
683 }
684
685 fn id(&mut self, lexer: &mut Lexer, start: Pos, end: Pos) -> Option<()> {
686 self.add("id", lexer.slice(start, end)?);
687 Some(())
688 }
689
690 fn left_parenthesis(&mut self, lexer: &mut Lexer, start: Pos, end: Pos) -> Option<()> {
691 self.add("left_parenthesis", lexer.slice(start, end)?);
692 Some(())
693 }
694
695 fn right_parenthesis(&mut self, lexer: &mut Lexer, start: Pos, end: Pos) -> Option<()> {
696 self.add("right_parenthesis", lexer.slice(start, end)?);
697 Some(())
698 }
699
700 fn comma(&mut self, lexer: &mut Lexer, start: Pos, end: Pos) -> Option<()> {
701 self.add("comma", lexer.slice(start, end)?);
702 Some(())
703 }
704
705 fn class(&mut self, lexer: &mut Lexer, start: Pos, end: Pos) -> Option<()> {
706 self.add("class", lexer.slice(start, end)?);
707 Some(())
708 }
709
710 fn pseudo_function(&mut self, lexer: &mut Lexer, start: Pos, end: Pos) -> Option<()> {
711 self.add("pseudo_function", lexer.slice(start, end)?);
712 Some(())
713 }
714
715 fn pseudo_class(&mut self, lexer: &mut Lexer, start: Pos, end: Pos) -> Option<()> {
716 self.add("pseudo_class", lexer.slice(start, end)?);
717 Some(())
718 }
719
720 fn semicolon(&mut self, lexer: &mut Lexer, start: Pos, end: Pos) -> Option<()> {
721 self.add("semicolon", lexer.slice(start, end)?);
722 Some(())
723 }
724
725 fn at_keyword(&mut self, lexer: &mut Lexer, start: Pos, end: Pos) -> Option<()> {
726 self.add("at_keyword", lexer.slice(start, end)?);
727 Some(())
728 }
729
730 fn left_curly_bracket(&mut self, lexer: &mut Lexer, start: Pos, end: Pos) -> Option<()> {
731 self.add("left_curly", lexer.slice(start, end)?);
732 Some(())
733 }
734
735 fn right_curly_bracket(&mut self, lexer: &mut Lexer, start: Pos, end: Pos) -> Option<()> {
736 self.add("right_curly", lexer.slice(start, end)?);
737 Some(())
738 }
739 }
740
741 fn assert_lexer_snapshot(input: &str, snapshot: &str) {
742 let mut s = Snapshot::default();
743 let mut l = Lexer::new(input);
744 l.lex(&mut s);
745 assert!(l.cur().is_none());
746 similar_asserts::assert_eq!(s.snapshot(), snapshot);
747 }
748
749 #[test]
750 fn lexer_state_1() {
751 let mut l = Lexer::new("");
752 assert_lexer_state(&l, None, None, None, Some(0), None, None);
753 l.consume();
754 assert_eq!(l.cur(), None);
755 assert_lexer_state(&l, None, Some(0), None, None, None, None);
756 l.consume();
757 assert_eq!(l.cur(), None);
758 }
759
760 #[test]
761 fn lexer_state_2() {
762 let mut l = Lexer::new("0壹👂삼");
763 assert_lexer_state(&l, None, None, Some('0'), Some(0), Some('壹'), Some(1));
764 l.consume();
765 assert_eq!(l.cur(), Some('0'));
766 assert_lexer_state(
767 &l,
768 Some('0'),
769 Some(0),
770 Some('壹'),
771 Some(1),
772 Some('👂'),
773 Some(4),
774 );
775 l.consume();
776 assert_eq!(l.cur(), Some('壹'));
777 assert_lexer_state(
778 &l,
779 Some('壹'),
780 Some(1),
781 Some('👂'),
782 Some(4),
783 Some('삼'),
784 Some(8),
785 );
786 l.consume();
787 assert_eq!(l.cur(), Some('👂'));
788 assert_lexer_state(&l, Some('👂'), Some(4), Some('삼'), Some(8), None, Some(11));
789 l.consume();
790 assert_eq!(l.cur(), Some('삼'));
791 assert_lexer_state(&l, Some('삼'), Some(8), None, Some(11), None, None);
792 l.consume();
793 assert_eq!(l.cur(), None);
794 assert_lexer_state(&l, None, Some(11), None, None, None, None);
795 l.consume();
796 assert_eq!(l.cur(), None);
797 }
798
799 #[test]
800 fn lexer_state_3() {
801 let l = Lexer::new("");
802 let mut l = l.turn_back(0);
803 assert_lexer_state(&l, None, None, None, Some(0), None, None);
804 l.consume();
805 assert_lexer_state(&l, None, Some(0), None, None, None, None);
806 }
807
808 #[test]
809 fn parse_urls() {
810 assert_lexer_snapshot(
811 indoc! {r#"
812 body {
813 background: url(
814 https://example\2f4a8f.com\
815 /image.png
816 )
817 }
818 --element\ name.class\ name#_id {
819 background: url( "https://example.com/some url \"with\" 'spaces'.png" ) url('https://example.com/\'"quotes"\'.png');
820 }
821 "#},
822 indoc! {r#"
823 ident: body
824 left_curly: {
825 ident: background
826 url: https://example\2f4a8f.com\
827 /image.png
828 right_curly: }
829 ident: --element\ name
830 class: .class\ name
831 id: #_id
832 left_curly: {
833 ident: background
834 function: url(
835 string: "https://example.com/some url \"with\" 'spaces'.png"
836 right_parenthesis: )
837 function: url(
838 string: 'https://example.com/\'"quotes"\'.png'
839 right_parenthesis: )
840 semicolon: ;
841 right_curly: }
842 "#},
843 );
844 }
845
846 #[test]
847 fn parse_pseudo_functions() {
848 assert_lexer_snapshot(
849 indoc! {r#"
850 :local(.class#id, .class:not(*:hover)) { color: red; }
851 :import(something from ":somewhere") {}
852 "#},
853 indoc! {r#"
854 pseudo_function: :local(
855 class: .class
856 id: #id
857 comma: ,
858 class: .class
859 pseudo_function: :not(
860 pseudo_class: :hover
861 right_parenthesis: )
862 right_parenthesis: )
863 left_curly: {
864 ident: color
865 ident: red
866 semicolon: ;
867 right_curly: }
868 pseudo_function: :import(
869 ident: something
870 ident: from
871 string: ":somewhere"
872 right_parenthesis: )
873 left_curly: {
874 right_curly: }
875 "#},
876 );
877 }
878
879 #[test]
880 fn parse_at_rules() {
881 assert_lexer_snapshot(
882 indoc! {r#"
883 @media (max-size: 100px) {
884 @import "external.css";
885 body { color: red; }
886 }
887 "#},
888 indoc! {r#"
889 at_keyword: @media
890 left_parenthesis: (
891 ident: max-size
892 right_parenthesis: )
893 left_curly: {
894 at_keyword: @import
895 string: "external.css"
896 semicolon: ;
897 ident: body
898 left_curly: {
899 ident: color
900 ident: red
901 semicolon: ;
902 right_curly: }
903 right_curly: }
904 "#},
905 );
906 }
907
908 #[test]
909 fn parse_escape() {
910 assert_lexer_snapshot(
911 indoc! {r#"
912 body {
913 a\
914 a: \
915 url(https://example\2f4a8f.com\
916 /image.png)
917 b: url(#\
918 hash)
919 }
920 "#},
921 indoc! {r#"
922 ident: body
923 left_curly: {
924 ident: a\
925 a
926 url: https://example\2f4a8f.com\
927 /image.png
928 ident: b
929 url: #\
930 hash
931 right_curly: }
932 "#},
933 );
934 }
935
936 #[test]
937 fn parse_pseudo_elements() {
938 assert_lexer_snapshot(
939 indoc! {r#"
940 a::after {
941 content: ' (' attr(href) ')';
942 }
943 "#},
944 indoc! {r#"
945 ident: a
946 pseudo_class: :after
947 left_curly: {
948 ident: content
949 string: ' ('
950 function: attr(
951 ident: href
952 right_parenthesis: )
953 string: ')'
954 semicolon: ;
955 right_curly: }
956 "#},
957 );
958 }
959
960 #[test]
961 fn parse_minimized_urls() {
962 assert_lexer_snapshot(
963 "body{background:url(./image.png)}",
964 indoc! {r#"
965 ident: body
966 left_curly: {
967 ident: background
968 pseudo_function: :url(
969 class: .
970 ident: image
971 class: .png
972 right_parenthesis: )
973 right_curly: }
974 "#},
975 );
976 }
977}