css_module_lexer/
lexer.rs

1use std::{iter::Rev, str::Chars};
2
3use crate::Range;
4
5pub const C_LINE_FEED: char = '\n';
6pub const C_CARRIAGE_RETURN: char = '\r';
7pub const C_FORM_FEED: char = '\u{c}';
8
9pub const C_TAB: char = '\t';
10pub const C_SPACE: char = ' ';
11
12pub const C_SOLIDUS: char = '/';
13pub const C_REVERSE_SOLIDUS: char = '\\';
14pub const C_ASTERISK: char = '*';
15
16pub const C_LEFT_PARENTHESIS: char = '(';
17pub const C_RIGHT_PARENTHESIS: char = ')';
18pub const C_LEFT_CURLY: char = '{';
19pub const C_RIGHT_CURLY: char = '}';
20pub const C_LEFT_SQUARE: char = '[';
21pub const C_RIGHT_SQUARE: char = ']';
22
23pub const C_QUOTATION_MARK: char = '"';
24pub const C_APOSTROPHE: char = '\'';
25
26pub const C_FULL_STOP: char = '.';
27pub const C_COLON: char = ':';
28pub const C_SEMICOLON: char = ';';
29pub const C_COMMA: char = ',';
30pub const C_PERCENTAGE: char = '%';
31pub const C_AT_SIGN: char = '@';
32
33pub const C_LOW_LINE: char = '_';
34pub const C_LOWER_A: char = 'a';
35pub const C_LOWER_E: char = 'e';
36pub const C_LOWER_F: char = 'f';
37pub const C_LOWER_Z: char = 'z';
38pub const C_UPPER_A: char = 'A';
39pub const C_UPPER_E: char = 'E';
40pub const C_UPPER_F: char = 'F';
41pub const C_UPPER_Z: char = 'Z';
42pub const C_0: char = '0';
43pub const C_9: char = '9';
44
45pub const C_NUMBER_SIGN: char = '#';
46pub const C_PLUS_SIGN: char = '+';
47pub const C_HYPHEN_MINUS: char = '-';
48
49pub const C_LESS_THAN_SIGN: char = '<';
50pub const C_GREATER_THAN_SIGN: char = '>';
51
52pub type Pos = u32;
53
54pub trait Visitor<'s> {
55    fn function(&mut self, lexer: &mut Lexer<'s>, start: Pos, end: Pos) -> Option<()>;
56    fn ident(&mut self, lexer: &mut Lexer<'s>, start: Pos, end: Pos) -> Option<()>;
57    fn url(
58        &mut self,
59        lexer: &mut Lexer<'s>,
60        start: Pos,
61        end: Pos,
62        content_start: Pos,
63        content_end: Pos,
64    ) -> Option<()>;
65    fn string(&mut self, lexer: &mut Lexer<'s>, start: Pos, end: Pos) -> Option<()>;
66    fn is_selector(&mut self, lexer: &mut Lexer<'s>) -> Option<bool>;
67    fn id(&mut self, lexer: &mut Lexer<'s>, start: Pos, end: Pos) -> Option<()>;
68    fn left_parenthesis(&mut self, lexer: &mut Lexer<'s>, start: Pos, end: Pos) -> Option<()>;
69    fn right_parenthesis(&mut self, lexer: &mut Lexer<'s>, start: Pos, end: Pos) -> Option<()>;
70    fn comma(&mut self, lexer: &mut Lexer<'s>, start: Pos, end: Pos) -> Option<()>;
71    fn class(&mut self, lexer: &mut Lexer<'s>, start: Pos, end: Pos) -> Option<()>;
72    fn pseudo_function(&mut self, lexer: &mut Lexer<'s>, start: Pos, end: Pos) -> Option<()>;
73    fn pseudo_class(&mut self, lexer: &mut Lexer<'s>, start: Pos, end: Pos) -> Option<()>;
74    fn semicolon(&mut self, lexer: &mut Lexer<'s>, start: Pos, end: Pos) -> Option<()>;
75    fn at_keyword(&mut self, lexer: &mut Lexer<'s>, start: Pos, end: Pos) -> Option<()>;
76    fn left_curly_bracket(&mut self, lexer: &mut Lexer<'s>, start: Pos, end: Pos) -> Option<()>;
77    fn right_curly_bracket(&mut self, lexer: &mut Lexer<'s>, start: Pos, end: Pos) -> Option<()>;
78}
79
80#[derive(Debug, Clone)]
81pub struct Lexer<'s, I: Iterator<Item = char> = Chars<'s>> {
82    value: &'s str,
83    iter: I,
84    cur_pos: Option<Pos>,
85    cur: Option<char>,
86    peek: Option<char>,
87    peek2: Option<char>,
88}
89
90impl<'s> Lexer<'s> {
91    pub fn new(value: &'s str) -> Self {
92        let mut iter = value.chars();
93        let peek = iter.next();
94        let peek2 = iter.next();
95        Self {
96            value,
97            iter,
98            cur_pos: None,
99            cur: None,
100            peek,
101            peek2,
102        }
103    }
104
105    pub fn turn_back(self, end: Pos) -> Lexer<'s, Rev<Chars<'s>>> {
106        let value = self.slice(0, end).unwrap();
107        let mut iter = value.chars().rev();
108        let peek = iter.next();
109        let peek2 = iter.next();
110        Lexer {
111            value,
112            iter,
113            cur_pos: None,
114            cur: None,
115            peek,
116            peek2,
117        }
118    }
119
120    pub fn slice(&self, start: Pos, end: Pos) -> Option<&'s str> {
121        Self::slice_range(self.value, &Range::new(start, end))
122    }
123
124    pub fn slice_range<'a>(input: &'a str, range: &Range) -> Option<&'a str> {
125        input.get(range.start as usize..range.end as usize)
126    }
127}
128
129impl<'s, I: Iterator<Item = char>> Lexer<'s, I> {
130    pub fn consume(&mut self) {
131        self.cur_pos = self.peek_pos();
132        self.cur = self.peek;
133        self.peek = self.peek2;
134        self.peek2 = self.iter.next();
135    }
136
137    pub fn cur_pos(&self) -> Option<Pos> {
138        self.cur_pos
139    }
140
141    pub fn cur(&self) -> Option<char> {
142        self.cur
143    }
144
145    pub fn peek_pos(&self) -> Option<Pos> {
146        if let Some(pos) = self.cur_pos() {
147            self.cur().map(|c| pos + c.len_utf8() as u32)
148        } else {
149            Some(0)
150        }
151    }
152
153    pub fn peek(&self) -> Option<char> {
154        self.peek
155    }
156
157    pub fn peek2_pos(&self) -> Option<Pos> {
158        self.peek_pos()
159            .and_then(|pos| self.peek().map(|c| pos + c.len_utf8() as u32))
160    }
161
162    pub fn peek2(&self) -> Option<char> {
163        self.peek2
164    }
165}
166
167impl<'s> Lexer<'s> {
168    pub fn lex<T: Visitor<'s>>(&mut self, visitor: &mut T) {
169        self.lex_impl(visitor);
170    }
171
172    fn lex_impl<T: Visitor<'s>>(&mut self, visitor: &mut T) -> Option<()> {
173        self.consume();
174        while self.cur().is_some() {
175            self.consume_comments()?;
176            // https://drafts.csswg.org/css-syntax/#consume-token
177            match self.cur()? {
178                c if is_white_space(c) => self.consume_space()?,
179                C_QUOTATION_MARK => self.consume_string(visitor, C_QUOTATION_MARK)?,
180                C_NUMBER_SIGN => self.consume_number_sign(visitor)?,
181                C_APOSTROPHE => self.consume_string(visitor, C_APOSTROPHE)?,
182                C_LEFT_PARENTHESIS => self.consume_left_parenthesis(visitor)?,
183                C_RIGHT_PARENTHESIS => self.consume_right_parenthesis(visitor)?,
184                C_PLUS_SIGN => self.consume_plus_sign()?,
185                C_COMMA => self.consume_comma(visitor)?,
186                C_HYPHEN_MINUS => self.consume_minus(visitor)?,
187                C_FULL_STOP => self.consume_full_stop(visitor)?,
188                C_COLON => self.consume_potential_pseudo(visitor)?,
189                C_SEMICOLON => self.consume_semicolon(visitor)?,
190                C_LESS_THAN_SIGN => self.consume_less_than_sign()?,
191                C_AT_SIGN => self.consume_at_sign(visitor)?,
192                C_LEFT_SQUARE => self.consume_delim(),
193                C_REVERSE_SOLIDUS => self.consume_reverse_solidus(visitor)?,
194                C_RIGHT_SQUARE => self.consume_delim(),
195                C_LEFT_CURLY => self.consume_left_curly(visitor)?,
196                C_RIGHT_CURLY => self.consume_right_curly(visitor)?,
197                c if is_digit(c) => self.consume_numeric_token()?,
198                c if is_ident_start(c) => self.consume_ident_like(visitor)?,
199                _ => self.consume_delim(),
200            }
201        }
202        Some(())
203    }
204
205    pub fn consume_delim(&mut self) {
206        self.consume();
207    }
208
209    pub fn consume_numeric_token(&mut self) -> Option<()> {
210        self.consume_number()?;
211        let c = self.cur()?;
212        if start_ident_sequence(c, self.peek()?, self.peek2()?) {
213            return self.consume_ident_sequence();
214        }
215        if c == C_PERCENTAGE {
216            self.consume();
217        }
218        Some(())
219    }
220
221    pub fn consume_number(&mut self) -> Option<()> {
222        self.consume();
223        while is_digit(self.cur()?) {
224            self.consume();
225        }
226        if self.cur()? == C_FULL_STOP && is_digit(self.peek()?) {
227            self.consume();
228            self.consume();
229            while is_digit(self.cur()?) {
230                self.consume();
231            }
232        }
233        let c = self.cur()?;
234        if c == C_LOWER_E || c == C_UPPER_E {
235            let c = self.peek()?;
236            if is_digit(c) {
237                self.consume();
238            } else if c == C_HYPHEN_MINUS || c == C_PLUS_SIGN {
239                let c = self.peek2()?;
240                if is_digit(c) {
241                    self.consume();
242                    self.consume();
243                } else {
244                    return Some(());
245                }
246            } else {
247                return Some(());
248            }
249        } else {
250            return Some(());
251        }
252        self.consume();
253        while is_digit(self.cur()?) {
254            self.consume();
255        }
256        Some(())
257    }
258
259    pub fn consume_ident_sequence(&mut self) -> Option<()> {
260        loop {
261            let c = self.cur()?;
262            if maybe_valid_escape(c) {
263                self.consume();
264                self.consume_escaped()?;
265            } else if is_ident(c) {
266                self.consume();
267            } else {
268                return Some(());
269            }
270        }
271    }
272
273    pub fn consume_escaped(&mut self) -> Option<()> {
274        if is_hex_digit(self.cur()?) {
275            for _ in 1..5 {
276                self.consume();
277                if !is_hex_digit(self.cur()?) {
278                    break;
279                }
280            }
281            if is_white_space(self.cur()?) {
282                self.consume();
283            }
284        } else {
285            self.consume();
286        }
287        Some(())
288    }
289
290    pub fn consume_ident_like<T: Visitor<'s>>(&mut self, visitor: &mut T) -> Option<()> {
291        let start = self.cur_pos()?;
292        self.consume_ident_sequence()?;
293        let peek_pos = self.peek_pos()?;
294        if self.cur_pos()? == start + 3 && self.slice(start, peek_pos)?.eq_ignore_ascii_case("url(")
295        {
296            self.consume();
297            while is_white_space(self.cur()?) {
298                self.consume();
299            }
300            let c = self.cur()?;
301            if c == C_QUOTATION_MARK || c == C_APOSTROPHE {
302                visitor.function(self, start, peek_pos)
303            } else {
304                self.consume_url(visitor, start)
305            }
306        } else if self.cur()? == C_LEFT_PARENTHESIS {
307            self.consume();
308            visitor.function(self, start, self.cur_pos()?)
309        } else {
310            visitor.ident(self, start, self.cur_pos()?)
311        }
312    }
313
314    pub fn consume_url<T: Visitor<'s>>(
315        self: &mut Lexer<'s>,
316        visitor: &mut T,
317        start: Pos,
318    ) -> Option<()> {
319        let content_start = self.cur_pos()?;
320        loop {
321            let c = self.cur()?;
322            if maybe_valid_escape(c) {
323                self.consume();
324                self.consume_escaped()?;
325            } else if is_white_space(c) {
326                let content_end = self.cur_pos()?;
327                self.consume();
328                while is_white_space(self.cur()?) {
329                    self.consume();
330                }
331                if self.cur()? != C_RIGHT_PARENTHESIS {
332                    return Some(());
333                }
334                self.consume();
335                return visitor.url(self, start, self.cur_pos()?, content_start, content_end);
336            } else if c == C_RIGHT_PARENTHESIS {
337                let content_end = self.cur_pos()?;
338                self.consume();
339                return visitor.url(self, start, self.cur_pos()?, content_start, content_end);
340            } else if c == C_LEFT_PARENTHESIS {
341                return Some(());
342            } else {
343                self.consume();
344            }
345        }
346    }
347
348    pub fn consume_string<T: Visitor<'s>>(&mut self, visitor: &mut T, end: char) -> Option<()> {
349        let start = self.cur_pos()?;
350        self.consume();
351        loop {
352            let c = self.cur()?;
353            if c == end {
354                self.consume();
355                break;
356            } else if is_new_line(c) {
357                break;
358            } else if c == C_REVERSE_SOLIDUS {
359                self.consume();
360                let c2 = self.cur()?;
361                if is_new_line(c2) {
362                    self.consume();
363                } else if are_valid_escape(c, c2) {
364                    self.consume_escaped()?;
365                }
366            } else {
367                self.consume();
368            }
369        }
370        visitor.string(self, start, self.cur_pos()?)
371    }
372
373    pub fn consume_number_sign<T: Visitor<'s>>(&mut self, visitor: &mut T) -> Option<()> {
374        let c2 = self.peek()?;
375        let start = self.cur_pos()?;
376        if is_ident(c2) || are_valid_escape(c2, self.peek2()?) {
377            self.consume();
378            if !visitor.is_selector(self)? {
379                return Some(());
380            }
381            if !start_ident_sequence(self.cur()?, self.peek()?, self.peek2()?) {
382                return visitor.id(self, start, self.cur_pos()?);
383            }
384            self.consume_ident_sequence()?;
385            visitor.id(self, start, self.cur_pos()?)
386        } else {
387            self.consume_delim();
388            visitor.id(self, start, self.cur_pos()?)
389        }
390    }
391
392    pub fn consume_left_parenthesis<T: Visitor<'s>>(&mut self, visitor: &mut T) -> Option<()> {
393        self.consume();
394        let end = self.cur_pos()?;
395        visitor.left_parenthesis(self, end - 1, end)
396    }
397
398    pub fn consume_right_parenthesis<T: Visitor<'s>>(&mut self, visitor: &mut T) -> Option<()> {
399        self.consume();
400        let end = self.cur_pos()?;
401        visitor.right_parenthesis(self, end - 1, end)
402    }
403
404    pub fn consume_plus_sign(&mut self) -> Option<()> {
405        if start_number(self.cur()?, self.peek()?, self.peek2()?) {
406            self.consume_numeric_token()?;
407        } else {
408            self.consume_delim();
409        }
410        Some(())
411    }
412
413    pub fn consume_comma<T: Visitor<'s>>(&mut self, visitor: &mut T) -> Option<()> {
414        self.consume();
415        let end = self.cur_pos()?;
416        visitor.comma(self, end - 1, end)
417    }
418
419    pub fn consume_minus<T: Visitor<'s>>(&mut self, visitor: &mut T) -> Option<()> {
420        let c = self.cur()?;
421        let c2 = self.peek()?;
422        let c3 = self.peek2()?;
423        if start_number(c, c2, c3) {
424            self.consume_numeric_token()?;
425        } else if c2 == C_HYPHEN_MINUS && c3 == C_GREATER_THAN_SIGN {
426            self.consume();
427            self.consume();
428        } else if start_ident_sequence(c, c2, c3) {
429            self.consume_ident_like(visitor)?;
430        } else {
431            self.consume_delim();
432        }
433        Some(())
434    }
435
436    pub fn consume_full_stop<T: Visitor<'s>>(&mut self, visitor: &mut T) -> Option<()> {
437        let c = self.cur()?;
438        let c2 = self.peek()?;
439        let c3 = self.peek2()?;
440        if start_number(c, c2, c3) {
441            return self.consume_numeric_token();
442        }
443        let start = self.cur_pos()?;
444        self.consume();
445        if !visitor.is_selector(self)? {
446            return Some(());
447        }
448        if !start_ident_sequence(c2, c3, self.peek2()?) {
449            return visitor.class(self, start, self.cur_pos()?);
450        }
451        self.consume_ident_sequence()?;
452        visitor.class(self, start, self.cur_pos()?)
453    }
454
455    pub fn consume_potential_pseudo<T: Visitor<'s>>(&mut self, visitor: &mut T) -> Option<()> {
456        let start = self.cur_pos()?;
457        self.consume();
458        if !visitor.is_selector(self)?
459            || !start_ident_sequence(self.cur()?, self.peek()?, self.peek2()?)
460        {
461            return Some(());
462        }
463        self.consume_ident_sequence()?;
464        if self.cur()? == C_LEFT_PARENTHESIS {
465            self.consume();
466            visitor.pseudo_function(self, start, self.cur_pos()?)
467        } else {
468            visitor.pseudo_class(self, start, self.cur_pos()?)
469        }
470    }
471
472    pub fn consume_semicolon<T: Visitor<'s>>(&mut self, visitor: &mut T) -> Option<()> {
473        self.consume();
474        let end = self.cur_pos()?;
475        visitor.semicolon(self, end - 1, end)
476    }
477
478    pub fn consume_less_than_sign(&mut self) -> Option<()> {
479        self.consume();
480        if self.cur()? == '!' && self.peek()? == '-' && self.peek2()? == '-' {
481            self.consume();
482            self.consume();
483            self.consume();
484        }
485        Some(())
486    }
487
488    pub fn consume_at_sign<T: Visitor<'s>>(&mut self, visitor: &mut T) -> Option<()> {
489        let start = self.cur_pos()?;
490        self.consume();
491        if start_ident_sequence(self.cur()?, self.peek()?, self.peek2()?) {
492            self.consume_ident_sequence()?;
493            return visitor.at_keyword(self, start, self.cur_pos()?);
494        }
495        Some(())
496    }
497
498    pub fn consume_reverse_solidus<T: Visitor<'s>>(&mut self, visitor: &mut T) -> Option<()> {
499        if are_valid_escape(self.cur()?, self.peek()?) {
500            self.consume_ident_like(visitor)?;
501        } else {
502            self.consume_delim();
503        }
504        Some(())
505    }
506
507    pub fn consume_left_curly<T: Visitor<'s>>(&mut self, visitor: &mut T) -> Option<()> {
508        self.consume();
509        let end = self.cur_pos()?;
510        visitor.left_curly_bracket(self, end - 1, end)
511    }
512
513    pub fn consume_right_curly<T: Visitor<'s>>(&mut self, visitor: &mut T) -> Option<()> {
514        self.consume();
515        let end = self.cur_pos()?;
516        visitor.right_curly_bracket(self, end - 1, end)
517    }
518}
519
520impl<'s, I: Iterator<Item = char>> Lexer<'s, I> {
521    pub fn consume_comments(&mut self) -> Option<()> {
522        if self.cur()? == C_SOLIDUS && self.peek()? == C_ASTERISK {
523            self.consume();
524            loop {
525                self.consume();
526                let c = self.cur()?;
527                if c == C_ASTERISK && self.peek()? == C_SOLIDUS {
528                    self.consume();
529                    self.consume();
530                    break;
531                }
532            }
533        }
534        Some(())
535    }
536
537    pub fn consume_space(&mut self) -> Option<()> {
538        self.consume();
539        while is_white_space(self.cur()?) {
540            self.consume();
541        }
542        Some(())
543    }
544
545    pub fn consume_white_space_and_comments(&mut self) -> Option<()> {
546        loop {
547            self.consume_comments()?;
548            if is_white_space(self.cur()?) {
549                self.consume_space()?;
550            } else {
551                break;
552            }
553        }
554        Some(())
555    }
556}
557
558pub fn is_new_line(c: char) -> bool {
559    c == C_LINE_FEED || c == C_CARRIAGE_RETURN || c == C_FORM_FEED
560}
561
562pub fn is_space(c: char) -> bool {
563    c == C_TAB || c == C_SPACE
564}
565
566pub fn is_white_space(c: char) -> bool {
567    is_new_line(c) || is_space(c)
568}
569
570pub fn is_digit(c: char) -> bool {
571    c >= C_0 && c <= C_9
572}
573
574pub fn is_hex_digit(c: char) -> bool {
575    is_digit(c) || (c >= C_UPPER_A && c <= C_UPPER_F) || (c >= C_LOWER_A && c <= C_LOWER_F)
576}
577
578pub fn is_ident_start(c: char) -> bool {
579    c == C_LOW_LINE
580        || (c >= C_LOWER_A && c <= C_LOWER_Z)
581        || (c >= C_UPPER_A && c <= C_UPPER_Z)
582        || c > '\u{80}'
583}
584
585pub fn is_ident(c: char) -> bool {
586    is_ident_start(c) || is_digit(c) || c == C_HYPHEN_MINUS
587}
588
589pub fn start_ident_sequence(c1: char, c2: char, c3: char) -> bool {
590    if c1 == C_HYPHEN_MINUS {
591        is_ident_start(c2) || c2 == C_HYPHEN_MINUS || are_valid_escape(c2, c3)
592    } else {
593        is_ident_start(c1) || are_valid_escape(c1, c2)
594    }
595}
596
597pub fn maybe_valid_escape(c: char) -> bool {
598    c == C_REVERSE_SOLIDUS
599}
600
601pub fn are_valid_escape(c1: char, c2: char) -> bool {
602    c1 == C_REVERSE_SOLIDUS && !is_new_line(c2)
603}
604
605pub fn start_number(c1: char, c2: char, c3: char) -> bool {
606    if c1 == C_PLUS_SIGN || c1 == C_HYPHEN_MINUS {
607        is_digit(c2) || (c2 == C_FULL_STOP && is_digit(c3))
608    } else {
609        is_digit(c1) || (c1 == C_FULL_STOP && is_digit(c2))
610    }
611}
612
613#[cfg(test)]
614mod tests {
615    use super::*;
616    use indoc::indoc;
617
618    fn assert_lexer_state<I: Iterator<Item = char>>(
619        lexer: &Lexer<'_, I>,
620        cur: Option<char>,
621        cur_pos: Option<Pos>,
622        peek: Option<char>,
623        peek_pos: Option<Pos>,
624        peek2: Option<char>,
625        peek2_pos: Option<Pos>,
626    ) {
627        assert_eq!(lexer.cur(), cur);
628        assert_eq!(lexer.cur_pos(), cur_pos);
629        assert_eq!(lexer.peek(), peek);
630        assert_eq!(lexer.peek_pos(), peek_pos);
631        assert_eq!(lexer.peek2(), peek2);
632        assert_eq!(lexer.peek2_pos(), peek2_pos);
633    }
634
635    #[derive(Default)]
636    struct Snapshot {
637        results: Vec<(String, String)>,
638    }
639
640    impl Snapshot {
641        pub fn add(&mut self, key: &str, value: &str) {
642            self.results.push((key.to_string(), value.to_string()))
643        }
644
645        pub fn snapshot(&self) -> String {
646            self.results
647                .iter()
648                .map(|(k, v)| format!("{k}: {v}\n"))
649                .collect::<String>()
650        }
651    }
652
653    impl Visitor<'_> for Snapshot {
654        fn function(&mut self, lexer: &mut Lexer, start: Pos, end: Pos) -> Option<()> {
655            self.add("function", lexer.slice(start, end)?);
656            Some(())
657        }
658
659        fn ident(&mut self, lexer: &mut Lexer, start: Pos, end: Pos) -> Option<()> {
660            self.add("ident", lexer.slice(start, end)?);
661            Some(())
662        }
663
664        fn url(
665            &mut self,
666            lexer: &mut Lexer,
667            _: Pos,
668            _: Pos,
669            content_start: Pos,
670            content_end: Pos,
671        ) -> Option<()> {
672            self.add("url", lexer.slice(content_start, content_end)?);
673            Some(())
674        }
675
676        fn string(&mut self, lexer: &mut Lexer, start: Pos, end: Pos) -> Option<()> {
677            self.add("string", lexer.slice(start, end)?);
678            Some(())
679        }
680
681        fn is_selector(&mut self, _: &mut Lexer) -> Option<bool> {
682            Some(true)
683        }
684
685        fn id(&mut self, lexer: &mut Lexer, start: Pos, end: Pos) -> Option<()> {
686            self.add("id", lexer.slice(start, end)?);
687            Some(())
688        }
689
690        fn left_parenthesis(&mut self, lexer: &mut Lexer, start: Pos, end: Pos) -> Option<()> {
691            self.add("left_parenthesis", lexer.slice(start, end)?);
692            Some(())
693        }
694
695        fn right_parenthesis(&mut self, lexer: &mut Lexer, start: Pos, end: Pos) -> Option<()> {
696            self.add("right_parenthesis", lexer.slice(start, end)?);
697            Some(())
698        }
699
700        fn comma(&mut self, lexer: &mut Lexer, start: Pos, end: Pos) -> Option<()> {
701            self.add("comma", lexer.slice(start, end)?);
702            Some(())
703        }
704
705        fn class(&mut self, lexer: &mut Lexer, start: Pos, end: Pos) -> Option<()> {
706            self.add("class", lexer.slice(start, end)?);
707            Some(())
708        }
709
710        fn pseudo_function(&mut self, lexer: &mut Lexer, start: Pos, end: Pos) -> Option<()> {
711            self.add("pseudo_function", lexer.slice(start, end)?);
712            Some(())
713        }
714
715        fn pseudo_class(&mut self, lexer: &mut Lexer, start: Pos, end: Pos) -> Option<()> {
716            self.add("pseudo_class", lexer.slice(start, end)?);
717            Some(())
718        }
719
720        fn semicolon(&mut self, lexer: &mut Lexer, start: Pos, end: Pos) -> Option<()> {
721            self.add("semicolon", lexer.slice(start, end)?);
722            Some(())
723        }
724
725        fn at_keyword(&mut self, lexer: &mut Lexer, start: Pos, end: Pos) -> Option<()> {
726            self.add("at_keyword", lexer.slice(start, end)?);
727            Some(())
728        }
729
730        fn left_curly_bracket(&mut self, lexer: &mut Lexer, start: Pos, end: Pos) -> Option<()> {
731            self.add("left_curly", lexer.slice(start, end)?);
732            Some(())
733        }
734
735        fn right_curly_bracket(&mut self, lexer: &mut Lexer, start: Pos, end: Pos) -> Option<()> {
736            self.add("right_curly", lexer.slice(start, end)?);
737            Some(())
738        }
739    }
740
741    fn assert_lexer_snapshot(input: &str, snapshot: &str) {
742        let mut s = Snapshot::default();
743        let mut l = Lexer::new(input);
744        l.lex(&mut s);
745        assert!(l.cur().is_none());
746        similar_asserts::assert_eq!(s.snapshot(), snapshot);
747    }
748
749    #[test]
750    fn lexer_state_1() {
751        let mut l = Lexer::new("");
752        assert_lexer_state(&l, None, None, None, Some(0), None, None);
753        l.consume();
754        assert_eq!(l.cur(), None);
755        assert_lexer_state(&l, None, Some(0), None, None, None, None);
756        l.consume();
757        assert_eq!(l.cur(), None);
758    }
759
760    #[test]
761    fn lexer_state_2() {
762        let mut l = Lexer::new("0壹👂삼");
763        assert_lexer_state(&l, None, None, Some('0'), Some(0), Some('壹'), Some(1));
764        l.consume();
765        assert_eq!(l.cur(), Some('0'));
766        assert_lexer_state(
767            &l,
768            Some('0'),
769            Some(0),
770            Some('壹'),
771            Some(1),
772            Some('👂'),
773            Some(4),
774        );
775        l.consume();
776        assert_eq!(l.cur(), Some('壹'));
777        assert_lexer_state(
778            &l,
779            Some('壹'),
780            Some(1),
781            Some('👂'),
782            Some(4),
783            Some('삼'),
784            Some(8),
785        );
786        l.consume();
787        assert_eq!(l.cur(), Some('👂'));
788        assert_lexer_state(&l, Some('👂'), Some(4), Some('삼'), Some(8), None, Some(11));
789        l.consume();
790        assert_eq!(l.cur(), Some('삼'));
791        assert_lexer_state(&l, Some('삼'), Some(8), None, Some(11), None, None);
792        l.consume();
793        assert_eq!(l.cur(), None);
794        assert_lexer_state(&l, None, Some(11), None, None, None, None);
795        l.consume();
796        assert_eq!(l.cur(), None);
797    }
798
799    #[test]
800    fn lexer_state_3() {
801        let l = Lexer::new("");
802        let mut l = l.turn_back(0);
803        assert_lexer_state(&l, None, None, None, Some(0), None, None);
804        l.consume();
805        assert_lexer_state(&l, None, Some(0), None, None, None, None);
806    }
807
808    #[test]
809    fn parse_urls() {
810        assert_lexer_snapshot(
811            indoc! {r#"
812            body {
813                background: url(
814                    https://example\2f4a8f.com\
815            /image.png
816                )
817            }
818            --element\ name.class\ name#_id {
819                background: url(  "https://example.com/some url \"with\" 'spaces'.png"   )  url('https://example.com/\'"quotes"\'.png');
820            }
821        "#},
822            indoc! {r#"
823            ident: body
824            left_curly: {
825            ident: background
826            url: https://example\2f4a8f.com\
827            /image.png
828            right_curly: }
829            ident: --element\ name
830            class: .class\ name
831            id: #_id
832            left_curly: {
833            ident: background
834            function: url(
835            string: "https://example.com/some url \"with\" 'spaces'.png"
836            right_parenthesis: )
837            function: url(
838            string: 'https://example.com/\'"quotes"\'.png'
839            right_parenthesis: )
840            semicolon: ;
841            right_curly: }
842        "#},
843        );
844    }
845
846    #[test]
847    fn parse_pseudo_functions() {
848        assert_lexer_snapshot(
849            indoc! {r#"
850            :local(.class#id, .class:not(*:hover)) { color: red; }
851            :import(something from ":somewhere") {}
852        "#},
853            indoc! {r#"
854            pseudo_function: :local(
855            class: .class
856            id: #id
857            comma: ,
858            class: .class
859            pseudo_function: :not(
860            pseudo_class: :hover
861            right_parenthesis: )
862            right_parenthesis: )
863            left_curly: {
864            ident: color
865            ident: red
866            semicolon: ;
867            right_curly: }
868            pseudo_function: :import(
869            ident: something
870            ident: from
871            string: ":somewhere"
872            right_parenthesis: )
873            left_curly: {
874            right_curly: }
875        "#},
876        );
877    }
878
879    #[test]
880    fn parse_at_rules() {
881        assert_lexer_snapshot(
882            indoc! {r#"
883            @media (max-size: 100px) {
884                @import "external.css";
885                body { color: red; }
886            }
887        "#},
888            indoc! {r#"
889            at_keyword: @media
890            left_parenthesis: (
891            ident: max-size
892            right_parenthesis: )
893            left_curly: {
894            at_keyword: @import
895            string: "external.css"
896            semicolon: ;
897            ident: body
898            left_curly: {
899            ident: color
900            ident: red
901            semicolon: ;
902            right_curly: }
903            right_curly: }
904        "#},
905        );
906    }
907
908    #[test]
909    fn parse_escape() {
910        assert_lexer_snapshot(
911            indoc! {r#"
912                body {
913                    a\
914                a: \
915                url(https://example\2f4a8f.com\
916                /image.png)
917                    b: url(#\
918                hash)
919                }
920            "#},
921            indoc! {r#"
922                ident: body
923                left_curly: {
924                ident: a\
925                a
926                url: https://example\2f4a8f.com\
927                /image.png
928                ident: b
929                url: #\
930                hash
931                right_curly: }
932            "#},
933        );
934    }
935
936    #[test]
937    fn parse_pseudo_elements() {
938        assert_lexer_snapshot(
939            indoc! {r#"
940                a::after {
941                    content: ' (' attr(href) ')';
942                }
943            "#},
944            indoc! {r#"
945                ident: a
946                pseudo_class: :after
947                left_curly: {
948                ident: content
949                string: ' ('
950                function: attr(
951                ident: href
952                right_parenthesis: )
953                string: ')'
954                semicolon: ;
955                right_curly: }
956            "#},
957        );
958    }
959
960    #[test]
961    fn parse_minimized_urls() {
962        assert_lexer_snapshot(
963            "body{background:url(./image.png)}",
964            indoc! {r#"
965                ident: body
966                left_curly: {
967                ident: background
968                pseudo_function: :url(
969                class: .
970                ident: image
971                class: .png
972                right_parenthesis: )
973                right_curly: }
974            "#},
975        );
976    }
977}