surrealdb_core/sql/
regex.rs

1use crate::cnf::{REGEX_CACHE_SIZE, REGEX_SIZE_LIMIT};
2use quick_cache::sync::{Cache, GuardResult};
3use regex::RegexBuilder;
4use revision::revisioned;
5use serde::{
6	de::{self, Visitor},
7	Deserialize, Deserializer, Serialize, Serializer,
8};
9use std::cmp::Ordering;
10use std::fmt::Debug;
11use std::fmt::{self, Display, Formatter};
12use std::hash::{Hash, Hasher};
13use std::str;
14use std::str::FromStr;
15use std::sync::LazyLock;
16
17pub(crate) const TOKEN: &str = "$surrealdb::private::sql::Regex";
18
19#[revisioned(revision = 1)]
20#[derive(Clone)]
21#[non_exhaustive]
22pub struct Regex(pub regex::Regex);
23
24impl Regex {
25	// Deref would expose `regex::Regex::as_str` which wouldn't have the '/' delimiters.
26	pub fn regex(&self) -> &regex::Regex {
27		&self.0
28	}
29}
30
31pub(crate) fn regex_new(str: &str) -> Result<regex::Regex, regex::Error> {
32	static REGEX_CACHE: LazyLock<Cache<String, regex::Regex>> =
33		LazyLock::new(|| Cache::new(REGEX_CACHE_SIZE.max(10)));
34	match REGEX_CACHE.get_value_or_guard(str, None) {
35		GuardResult::Value(v) => Ok(v),
36		GuardResult::Guard(g) => {
37			let re = RegexBuilder::new(str).size_limit(*REGEX_SIZE_LIMIT).build()?;
38			g.insert(re.clone()).ok();
39			Ok(re)
40		}
41		GuardResult::Timeout => {
42			warn!("Regex cache timeout");
43			RegexBuilder::new(str).size_limit(*REGEX_SIZE_LIMIT).build()
44		}
45	}
46}
47
48impl FromStr for Regex {
49	type Err = <regex::Regex as FromStr>::Err;
50
51	fn from_str(s: &str) -> Result<Self, Self::Err> {
52		if s.contains('\0') {
53			Err(regex::Error::Syntax("regex contained NUL byte".to_owned()))
54		} else {
55			regex_new(&s.replace("\\/", "/")).map(Self)
56		}
57	}
58}
59
60impl PartialEq for Regex {
61	fn eq(&self, other: &Self) -> bool {
62		let str_left = self.0.as_str();
63		let str_right = other.0.as_str();
64		str_left == str_right
65	}
66}
67
68impl Eq for Regex {}
69
70impl Ord for Regex {
71	fn cmp(&self, other: &Self) -> Ordering {
72		self.0.as_str().cmp(other.0.as_str())
73	}
74}
75
76impl PartialOrd for Regex {
77	fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
78		Some(self.cmp(other))
79	}
80}
81
82impl Hash for Regex {
83	fn hash<H: Hasher>(&self, state: &mut H) {
84		self.0.as_str().hash(state);
85	}
86}
87
88impl Debug for Regex {
89	fn fmt(&self, f: &mut Formatter) -> fmt::Result {
90		Display::fmt(self, f)
91	}
92}
93
94impl Display for Regex {
95	fn fmt(&self, f: &mut Formatter) -> fmt::Result {
96		let t = self.0.to_string().replace('/', "\\/");
97		write!(f, "/{}/", &t)
98	}
99}
100
101impl Serialize for Regex {
102	fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
103	where
104		S: Serializer,
105	{
106		serializer.serialize_newtype_struct(TOKEN, self.0.as_str())
107	}
108}
109
110impl<'de> Deserialize<'de> for Regex {
111	fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
112	where
113		D: Deserializer<'de>,
114	{
115		struct RegexNewtypeVisitor;
116
117		impl<'de> Visitor<'de> for RegexNewtypeVisitor {
118			type Value = Regex;
119
120			fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
121				formatter.write_str("a regex newtype")
122			}
123
124			fn visit_newtype_struct<D>(self, deserializer: D) -> Result<Self::Value, D::Error>
125			where
126				D: Deserializer<'de>,
127			{
128				struct RegexVisitor;
129
130				impl Visitor<'_> for RegexVisitor {
131					type Value = Regex;
132
133					fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
134						formatter.write_str("a regex str")
135					}
136
137					fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
138					where
139						E: de::Error,
140					{
141						Regex::from_str(value).map_err(|_| de::Error::custom("invalid regex"))
142					}
143				}
144
145				deserializer.deserialize_str(RegexVisitor)
146			}
147		}
148
149		deserializer.deserialize_newtype_struct(TOKEN, RegexNewtypeVisitor)
150	}
151}
152
153#[cfg(test)]
154mod tests {
155	use super::regex_new;
156	#[test]
157	fn regex_compile_limit() {
158		match regex_new("^(a|b|c){1000000}") {
159			Err(e) => {
160				assert!(matches!(e, regex::Error::CompiledTooBig(10_485_760)), "{e}");
161			}
162			Ok(_) => panic!("regex should have failed"),
163		}
164	}
165}