cedar_policy_core/parser/
unescape.rsuse crate::ast::PatternElem;
use itertools::Itertools;
use miette::Diagnostic;
use nonempty::NonEmpty;
use rustc_lexer::unescape::{unescape_str, EscapeError};
use smol_str::SmolStr;
use std::ops::Range;
use thiserror::Error;
pub fn to_unescaped_string(s: &str) -> Result<SmolStr, NonEmpty<UnescapeError>> {
let mut unescaped_str = String::new();
let mut errs = Vec::new();
let mut callback = |range, r| match r {
Ok(c) => unescaped_str.push(c),
Err(err) => errs.push(UnescapeError {
err,
input: s.to_owned(),
range,
}),
};
unescape_str(s, &mut callback);
if let Some((head, tails)) = errs.split_first() {
Err(NonEmpty {
head: head.clone(),
tail: tails.iter().cloned().collect_vec(),
})
} else {
Ok(unescaped_str.into())
}
}
pub(crate) fn to_pattern(s: &str) -> Result<Vec<PatternElem>, NonEmpty<UnescapeError>> {
let mut unescaped_str = Vec::new();
let mut errs = Vec::new();
let bytes = s.as_bytes(); let mut callback = |range: Range<usize>, r| match r {
Ok(c) => unescaped_str.push(if c == '*' { PatternElem::Wildcard }else { PatternElem::Char(c) }),
#[allow(clippy::indexing_slicing)]
Err(EscapeError::InvalidEscape)
if &bytes[range.start..range.end] == r"\*".as_bytes()
=>
{
unescaped_str.push(PatternElem::Char('*'))
}
Err(err) => errs.push(UnescapeError { err, input: s.to_owned(), range }),
};
unescape_str(s, &mut callback);
if let Some((head, tails)) = errs.split_first() {
Err(NonEmpty {
head: head.clone(),
tail: tails.iter().cloned().collect_vec(),
})
} else {
Ok(unescaped_str)
}
}
#[derive(Debug, Diagnostic, Error, PartialEq, Eq)]
pub struct UnescapeError {
err: EscapeError,
#[source_code]
input: String,
#[label]
range: Range<usize>,
}
impl Clone for UnescapeError {
fn clone(&self) -> Self {
Self {
err: clone_escape_error(&self.err),
input: self.input.clone(),
range: self.range.clone(),
}
}
}
fn clone_escape_error(e: &EscapeError) -> EscapeError {
match e {
EscapeError::ZeroChars => EscapeError::ZeroChars,
EscapeError::MoreThanOneChar => EscapeError::MoreThanOneChar,
EscapeError::LoneSlash => EscapeError::LoneSlash,
EscapeError::InvalidEscape => EscapeError::InvalidEscape,
EscapeError::BareCarriageReturn => EscapeError::BareCarriageReturn,
EscapeError::BareCarriageReturnInRawString => EscapeError::BareCarriageReturnInRawString,
EscapeError::EscapeOnlyChar => EscapeError::EscapeOnlyChar,
EscapeError::TooShortHexEscape => EscapeError::TooShortHexEscape,
EscapeError::InvalidCharInHexEscape => EscapeError::InvalidCharInHexEscape,
EscapeError::OutOfRangeHexEscape => EscapeError::OutOfRangeHexEscape,
EscapeError::NoBraceInUnicodeEscape => EscapeError::NoBraceInUnicodeEscape,
EscapeError::InvalidCharInUnicodeEscape => EscapeError::InvalidCharInUnicodeEscape,
EscapeError::EmptyUnicodeEscape => EscapeError::EmptyUnicodeEscape,
EscapeError::UnclosedUnicodeEscape => EscapeError::UnclosedUnicodeEscape,
EscapeError::LeadingUnderscoreUnicodeEscape => EscapeError::LeadingUnderscoreUnicodeEscape,
EscapeError::OverlongUnicodeEscape => EscapeError::OverlongUnicodeEscape,
EscapeError::LoneSurrogateUnicodeEscape => EscapeError::LoneSurrogateUnicodeEscape,
EscapeError::OutOfRangeUnicodeEscape => EscapeError::OutOfRangeUnicodeEscape,
EscapeError::UnicodeEscapeInByte => EscapeError::UnicodeEscapeInByte,
EscapeError::NonAsciiCharInByte => EscapeError::NonAsciiCharInByte,
EscapeError::NonAsciiCharInByteString => EscapeError::NonAsciiCharInByteString,
}
}
impl std::fmt::Display for UnescapeError {
#[allow(clippy::indexing_slicing)]
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"the input `{}` is not a valid escape",
&self.input[self.range.clone()],
)
}
}
#[cfg(test)]
mod test {
use cool_asserts::assert_matches;
use super::to_unescaped_string;
use crate::ast;
use crate::parser::err::{ParseError, ToASTErrorKind};
use crate::parser::text_to_cst;
#[test]
fn test_string_escape() {
assert_eq!(
to_unescaped_string(r"\t\r\n\\\0\x42").expect("valid string"),
"\t\r\n\\\0\x42"
);
let errs = to_unescaped_string(r"abc\xFFdef").expect_err("should be an invalid escape");
assert_eq!(errs.len(), 1);
assert_eq!(
to_unescaped_string(r"\u{0}\u{1}\u{1234}\u{12345}\u{054321}\u{123}\u{42}",)
.expect("valid string"),
"\u{000000}\u{001}\u{001234}\u{012345}\u{054321}\u{123}\u{00042}"
);
let errs = to_unescaped_string(r"abc\u{1111111}\u{222222222}FFdef")
.expect_err("should be invalid escapes");
assert_eq!(errs.len(), 2);
let errs = to_unescaped_string(r"abc\*\bdef").expect_err("should be invalid escapes");
assert_eq!(errs.len(), 2);
}
#[allow(clippy::indexing_slicing)]
#[test]
fn test_pattern_escape() {
assert!(
matches!(text_to_cst::parse_expr(r#""aa" like "\t\r\n\\\0\x42\*""#)
.expect("failed parsing")
.to_expr()
.expect("failed conversion").expr_kind(),
ast::ExprKind::Like {
expr: _,
pattern,
} if
pattern.to_string() ==
format!("{}{}", "\t\r\n\\\0\x42".escape_debug(), r"\*")
)
);
let errs = text_to_cst::parse_expr(r#""abc" like "abc\xFF\xFEdef""#)
.expect("failed parsing")
.to_expr()
.unwrap_err();
assert_eq!(errs.len(), 2);
assert_matches!(&errs[0], ParseError::ToAST(e) => assert_matches!(e.kind(), ToASTErrorKind::Unescape(_)));
assert_matches!(&errs[1], ParseError::ToAST(e) => assert_matches!(e.kind(), ToASTErrorKind::Unescape(_)));
assert!(
matches!(text_to_cst::parse_expr(r#""aaa" like "👀👀\*🤞🤞\*🤝""#)
.expect("failed parsing")
.to_expr()
.expect("failed conversion").expr_kind(),
ast::ExprKind::Like { expr: _, pattern} if pattern.to_string() == *r"👀👀\*🤞🤞\*🤝")
);
let errs = text_to_cst::parse_expr(r#""aaa" like "abc\d\bdef""#)
.expect("failed parsing")
.to_expr()
.unwrap_err();
assert_eq!(errs.len(), 2);
assert_matches!(&errs[0], ParseError::ToAST(e) => assert_matches!(e.kind(), ToASTErrorKind::Unescape(_)));
assert_matches!(&errs[1], ParseError::ToAST(e) => assert_matches!(e.kind(), ToASTErrorKind::Unescape(_)));
}
}