intuicio_parser/
regex.rs

1use crate::{
2    ParseResult, Parser, ParserExt, ParserHandle, ParserNoValue, ParserOutput, ParserRegistry,
3};
4
5pub mod shorthand {
6    use super::*;
7    use crate::shorthand::map;
8
9    pub fn regex(pattern: impl AsRef<str>) -> ParserHandle {
10        RegexParser::new(pattern).into_handle()
11    }
12
13    pub fn regex_capture(pattern: impl AsRef<str>, capture: impl ToString) -> ParserHandle {
14        RegexParser::new_capture(pattern, capture).into_handle()
15    }
16
17    pub fn any() -> ParserHandle {
18        regex(r".")
19    }
20
21    pub fn nl() -> ParserHandle {
22        regex(r"[\r\n]")
23    }
24
25    pub fn digit_hex() -> ParserHandle {
26        regex(r"[0-9a-fA-F]&")
27    }
28
29    pub fn digit() -> ParserHandle {
30        regex(r"\d")
31    }
32
33    pub fn number_int_pos() -> ParserHandle {
34        regex(r"\d+")
35    }
36
37    pub fn number_int() -> ParserHandle {
38        regex(r"-?\d+")
39    }
40
41    pub fn number_float() -> ParserHandle {
42        regex(r"-?\d+(\.\d+(e-?\d+)?)?")
43    }
44
45    pub fn alphanum() -> ParserHandle {
46        regex(r"\w")
47    }
48
49    pub fn alpha_low() -> ParserHandle {
50        regex(r"[a-z]")
51    }
52
53    pub fn alpha_up() -> ParserHandle {
54        regex(r"[A-Z]")
55    }
56
57    pub fn alpha() -> ParserHandle {
58        regex(r"[a-zA-Z]")
59    }
60
61    pub fn word() -> ParserHandle {
62        regex(r"\w+")
63    }
64
65    pub fn string(open: &str, close: &str) -> ParserHandle {
66        let open = open.escape_unicode().to_string();
67        let close = close.escape_unicode().to_string();
68        let pattern = format!("{0}(?<content>[^{1}]*){1}", open, close);
69        map(regex_capture(pattern, "content"), move |value: String| {
70            snailquote::unescape(&value).unwrap()
71        })
72    }
73
74    pub fn id_start() -> ParserHandle {
75        regex(r"[a-zA-Z_]")
76    }
77
78    pub fn id_continue() -> ParserHandle {
79        regex(r"[0-9a-zA-Z_]*")
80    }
81
82    pub fn id() -> ParserHandle {
83        regex(r"[a-zA-Z_][0-9a-zA-Z_]*")
84    }
85
86    pub fn ws() -> ParserHandle {
87        WhiteSpaceParser::default().into_handle()
88    }
89
90    pub fn ows() -> ParserHandle {
91        OptionalWhiteSpaceParser::default().into_handle()
92    }
93}
94
95#[derive(Clone)]
96pub struct RegexParser {
97    regex: regex::Regex,
98    capture: Option<String>,
99}
100
101impl RegexParser {
102    pub fn new(pattern: impl AsRef<str>) -> Self {
103        let pattern = format!(r"^{}", pattern.as_ref());
104        Self {
105            regex: regex::Regex::new(&pattern).expect("Expected valid regex"),
106            capture: None,
107        }
108    }
109
110    pub fn new_capture(pattern: impl AsRef<str>, capture: impl ToString) -> Self {
111        let pattern = format!(r"^{}", pattern.as_ref());
112        Self {
113            regex: regex::Regex::new(&pattern).expect("Expected valid regex"),
114            capture: Some(capture.to_string()),
115        }
116    }
117}
118
119impl Parser for RegexParser {
120    fn parse<'a>(&self, _: &ParserRegistry, input: &'a str) -> ParseResult<'a> {
121        if let Some(capture) = self.capture.as_deref() {
122            if let Some(cap) = self.regex.captures(input) {
123                Ok((
124                    &input[cap.get(0).unwrap().end()..],
125                    ParserOutput::new(
126                        cap.name(capture)
127                            .map(|mat| mat.as_str())
128                            .unwrap_or("")
129                            .to_owned(),
130                    )
131                    .ok()
132                    .unwrap(),
133                ))
134            } else {
135                Err(format!(
136                    "Expected regex match '{}' with capture: '{}'",
137                    self.regex, capture
138                )
139                .into())
140            }
141        } else if let Some(mat) = self.regex.find(input) {
142            Ok((
143                &input[mat.end()..],
144                ParserOutput::new(mat.as_str().to_owned()).ok().unwrap(),
145            ))
146        } else {
147            Err(format!("Expected regex match '{}'", self.regex).into())
148        }
149    }
150}
151
152#[derive(Clone)]
153pub struct WhiteSpaceParser(RegexParser);
154
155impl Default for WhiteSpaceParser {
156    fn default() -> Self {
157        Self(RegexParser::new(r"\s+"))
158    }
159}
160
161impl Parser for WhiteSpaceParser {
162    fn parse<'a>(&self, registry: &ParserRegistry, input: &'a str) -> ParseResult<'a> {
163        match self.0.parse(registry, input) {
164            Ok((rest, _)) => Ok((rest, ParserOutput::new(ParserNoValue).ok().unwrap())),
165            Err(error) => Err(error),
166        }
167    }
168}
169
170#[derive(Clone)]
171pub struct OptionalWhiteSpaceParser(RegexParser);
172
173impl Default for OptionalWhiteSpaceParser {
174    fn default() -> Self {
175        Self(RegexParser::new(r"\s*"))
176    }
177}
178
179impl Parser for OptionalWhiteSpaceParser {
180    fn parse<'a>(&self, registry: &ParserRegistry, input: &'a str) -> ParseResult<'a> {
181        match self.0.parse(registry, input) {
182            Ok((rest, _)) => Ok((rest, ParserOutput::new(ParserNoValue).ok().unwrap())),
183            Err(error) => Err(error),
184        }
185    }
186}
187
188#[cfg(test)]
189mod tests {
190    use crate::{
191        regex::{OptionalWhiteSpaceParser, RegexParser, WhiteSpaceParser},
192        shorthand::{ows, regex, regex_capture, string, ws},
193        ParserRegistry,
194    };
195
196    fn is_async<T: Send + Sync>() {}
197
198    #[test]
199    fn test_regex() {
200        is_async::<RegexParser>();
201        is_async::<WhiteSpaceParser>();
202        is_async::<OptionalWhiteSpaceParser>();
203
204        let registry = ParserRegistry::default();
205
206        let keyword = regex_capture(r"\s+(?<name>\w+)\s+", "name");
207        let (rest, result) = keyword.parse(&registry, " foo ").unwrap();
208        assert_eq!(rest, "");
209        assert_eq!(result.read::<String>().unwrap().as_str(), "foo");
210
211        let keyword = string("`", "`");
212        let (rest, result) = keyword.parse(&registry, "`Hello World!`").unwrap();
213        assert_eq!(rest, "");
214        assert_eq!(result.read::<String>().unwrap().as_str(), "Hello World!");
215
216        let keyword = string("(", ")");
217        let (rest, result) = keyword.parse(&registry, "(Hello World!)").unwrap();
218        assert_eq!(rest, "");
219        assert_eq!(result.read::<String>().unwrap().as_str(), "Hello World!");
220
221        let keyword = regex(r"\w+");
222        assert_eq!(keyword.parse(&registry, "foo bar").unwrap().0, " bar");
223
224        let ws = ws();
225        assert_eq!(ws.parse(&registry, "   \t  \n").unwrap().0, "");
226        assert_eq!(
227            format!("{}", ws.parse(&registry, "a").err().unwrap()),
228            "Expected regex match '^\\s+'"
229        );
230
231        let ows = ows();
232        assert_eq!(ows.parse(&registry, "   \t  \n").unwrap().0, "");
233        assert_eq!(ows.parse(&registry, "foo").unwrap().0, "foo");
234    }
235}