surrealdb_core/sql/
regex.rs1use crate::cnf::{REGEX_CACHE_SIZE, REGEX_SIZE_LIMIT};
2use quick_cache::sync::{Cache, GuardResult};
3use regex::RegexBuilder;
4use revision::revisioned;
5use serde::{
6 de::{self, Visitor},
7 Deserialize, Deserializer, Serialize, Serializer,
8};
9use std::cmp::Ordering;
10use std::fmt::Debug;
11use std::fmt::{self, Display, Formatter};
12use std::hash::{Hash, Hasher};
13use std::str;
14use std::str::FromStr;
15use std::sync::LazyLock;
16
17pub(crate) const TOKEN: &str = "$surrealdb::private::sql::Regex";
18
19#[revisioned(revision = 1)]
20#[derive(Clone)]
21#[non_exhaustive]
22pub struct Regex(pub regex::Regex);
23
24impl Regex {
25 pub fn regex(&self) -> ®ex::Regex {
27 &self.0
28 }
29}
30
31pub(crate) fn regex_new(str: &str) -> Result<regex::Regex, regex::Error> {
32 static REGEX_CACHE: LazyLock<Cache<String, regex::Regex>> =
33 LazyLock::new(|| Cache::new(REGEX_CACHE_SIZE.max(10)));
34 match REGEX_CACHE.get_value_or_guard(str, None) {
35 GuardResult::Value(v) => Ok(v),
36 GuardResult::Guard(g) => {
37 let re = RegexBuilder::new(str).size_limit(*REGEX_SIZE_LIMIT).build()?;
38 g.insert(re.clone()).ok();
39 Ok(re)
40 }
41 GuardResult::Timeout => {
42 warn!("Regex cache timeout");
43 RegexBuilder::new(str).size_limit(*REGEX_SIZE_LIMIT).build()
44 }
45 }
46}
47
48impl FromStr for Regex {
49 type Err = <regex::Regex as FromStr>::Err;
50
51 fn from_str(s: &str) -> Result<Self, Self::Err> {
52 if s.contains('\0') {
53 Err(regex::Error::Syntax("regex contained NUL byte".to_owned()))
54 } else {
55 regex_new(&s.replace("\\/", "/")).map(Self)
56 }
57 }
58}
59
60impl PartialEq for Regex {
61 fn eq(&self, other: &Self) -> bool {
62 let str_left = self.0.as_str();
63 let str_right = other.0.as_str();
64 str_left == str_right
65 }
66}
67
68impl Eq for Regex {}
69
70impl Ord for Regex {
71 fn cmp(&self, other: &Self) -> Ordering {
72 self.0.as_str().cmp(other.0.as_str())
73 }
74}
75
76impl PartialOrd for Regex {
77 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
78 Some(self.cmp(other))
79 }
80}
81
82impl Hash for Regex {
83 fn hash<H: Hasher>(&self, state: &mut H) {
84 self.0.as_str().hash(state);
85 }
86}
87
88impl Debug for Regex {
89 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
90 Display::fmt(self, f)
91 }
92}
93
94impl Display for Regex {
95 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
96 let t = self.0.to_string().replace('/', "\\/");
97 write!(f, "/{}/", &t)
98 }
99}
100
101impl Serialize for Regex {
102 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
103 where
104 S: Serializer,
105 {
106 serializer.serialize_newtype_struct(TOKEN, self.0.as_str())
107 }
108}
109
110impl<'de> Deserialize<'de> for Regex {
111 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
112 where
113 D: Deserializer<'de>,
114 {
115 struct RegexNewtypeVisitor;
116
117 impl<'de> Visitor<'de> for RegexNewtypeVisitor {
118 type Value = Regex;
119
120 fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
121 formatter.write_str("a regex newtype")
122 }
123
124 fn visit_newtype_struct<D>(self, deserializer: D) -> Result<Self::Value, D::Error>
125 where
126 D: Deserializer<'de>,
127 {
128 struct RegexVisitor;
129
130 impl Visitor<'_> for RegexVisitor {
131 type Value = Regex;
132
133 fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
134 formatter.write_str("a regex str")
135 }
136
137 fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
138 where
139 E: de::Error,
140 {
141 Regex::from_str(value).map_err(|_| de::Error::custom("invalid regex"))
142 }
143 }
144
145 deserializer.deserialize_str(RegexVisitor)
146 }
147 }
148
149 deserializer.deserialize_newtype_struct(TOKEN, RegexNewtypeVisitor)
150 }
151}
152
153#[cfg(test)]
154mod tests {
155 use super::regex_new;
156 #[test]
157 fn regex_compile_limit() {
158 match regex_new("^(a|b|c){1000000}") {
159 Err(e) => {
160 assert!(matches!(e, regex::Error::CompiledTooBig(10_485_760)), "{e}");
161 }
162 Ok(_) => panic!("regex should have failed"),
163 }
164 }
165}