cynic_parser/common/
strings.rs

1use std::fmt;
2
3use crate::{lexer, parser::AdditionalErrors, Span};
4
5pub(crate) fn unquote_block_string(src: &str) -> &str {
6    assert!(src.starts_with("\"\"\"") && src.ends_with("\"\"\""));
7
8    &src[3..src.len() - 3]
9}
10
11pub(crate) fn trim_block_string_whitespace(src: &str) -> String {
12    let lines = src.lines().collect::<Vec<_>>();
13
14    let mut common_indent = usize::MAX;
15    let mut first_non_empty_line: Option<usize> = None;
16    let mut last_non_empty_line = 0;
17    for (idx, line) in lines.iter().enumerate() {
18        let indent = line.find(|c: char| !c.is_whitespace());
19
20        if indent.is_none() || indent.unwrap() == line.len() {
21            continue;
22        }
23        let indent = indent.unwrap();
24
25        first_non_empty_line.get_or_insert(idx);
26        last_non_empty_line = idx;
27
28        if idx != 0 {
29            common_indent = std::cmp::min(common_indent, indent);
30        }
31    }
32
33    let Some(first_non_empty_line) = first_non_empty_line else {
34        // The block string contains only whitespace.
35        return "".to_string();
36    };
37
38    let mut result = String::with_capacity(src.len() - 6);
39    let mut lines = lines
40        .into_iter()
41        .enumerate()
42        // Skip leading and trailing empty lines.
43        .skip(first_non_empty_line)
44        .take(last_non_empty_line - first_non_empty_line + 1)
45        // Remove indent, except the first line.
46        .map(|(idx, line)| {
47            if idx == 0 {
48                line
49            } else if line.len() >= common_indent {
50                &line[common_indent..]
51            } else {
52                ""
53            }
54        })
55        // Handle escaped triple-quote (\""").
56        .map(|x| x.replace(r#"\""""#, r#"""""#));
57
58    if let Some(line) = lines.next() {
59        // TODO: Handle replacing the escaped tripe quote inline here maybe?
60        // Or possibly just don't, I don't know.
61        result.push_str(&line);
62
63        for line in lines {
64            result.push('\n');
65            result.push_str(&line);
66        }
67    }
68
69    result
70}
71
72pub(crate) fn unquote_string(s: &str, start_span: usize) -> Result<String, MalformedStringError> {
73    let mut res = String::with_capacity(s.len());
74    assert!(s.starts_with('"') && s.ends_with('"'));
75
76    let mut chars = s[1..s.len() - 1].char_indices();
77
78    // Count the '"' in our span
79    let start_span = start_span + 1;
80
81    let mut temp_code_point = String::with_capacity(4);
82    while let Some((index, c)) = chars.next() {
83        match c {
84            '\\' => {
85                match chars.next().expect("slash cant be at the end") {
86                    (_, c @ '"' | c @ '\\' | c @ '/') => res.push(c),
87                    (_, 'b') => res.push('\u{0010}'),
88                    (_, 'f') => res.push('\u{000C}'),
89                    (_, 'n') => res.push('\n'),
90                    (_, 'r') => res.push('\r'),
91                    (_, 't') => res.push('\t'),
92                    (_, 'u') => {
93                        temp_code_point.clear();
94                        let mut end_index = index;
95                        for _ in 0..4 {
96                            match chars.next() {
97                                Some((index, inner_c)) => {
98                                    temp_code_point.push(inner_c);
99                                    end_index = index;
100                                }
101                                None => {
102                                    return Err(MalformedStringError::MalformedCodePoint(
103                                        temp_code_point,
104                                        index + start_span,
105                                        end_index + start_span,
106                                    ));
107                                }
108                            }
109                        }
110
111                        // convert our hex string into a u32, then convert that into a char
112                        match u32::from_str_radix(&temp_code_point, 16).map(std::char::from_u32) {
113                            Ok(Some(unicode_char)) => res.push(unicode_char),
114                            _ => {
115                                return Err(MalformedStringError::UnknownCodePoint(
116                                    temp_code_point,
117                                    index + start_span,
118                                    end_index + start_span,
119                                ));
120                            }
121                        }
122                    }
123                    (end_index, c) => {
124                        return Err(MalformedStringError::UnknownEscapeChar(
125                            c,
126                            index + start_span,
127                            end_index + start_span,
128                        ));
129                    }
130                }
131            }
132            c => res.push(c),
133        }
134    }
135
136    Ok(res)
137}
138
139#[derive(Clone, Debug, PartialEq, Eq)]
140pub enum MalformedStringError {
141    MalformedCodePoint(String, usize, usize),
142    UnknownCodePoint(String, usize, usize),
143    UnknownEscapeChar(char, usize, usize),
144}
145
146impl MalformedStringError {
147    pub fn span(&self) -> Span {
148        let (start, end) = match self {
149            MalformedStringError::MalformedCodePoint(_, start, end) => (start, end),
150            MalformedStringError::UnknownCodePoint(_, start, end) => (start, end),
151            MalformedStringError::UnknownEscapeChar(_, start, end) => (start, end),
152        };
153
154        Span::new(*start, *end)
155    }
156}
157
158impl std::error::Error for MalformedStringError {}
159
160impl fmt::Display for MalformedStringError {
161    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
162        match self {
163            MalformedStringError::MalformedCodePoint(code_point, _, _) => {
164                write!(
165                    f,
166                    "\\u must have 4 characters after it, only found '{code_point}'"
167                )
168            }
169            MalformedStringError::UnknownCodePoint(code_point, _, _) => {
170                write!(f, "{code_point} is not a valid unicode code point",)
171            }
172            MalformedStringError::UnknownEscapeChar(char, _, _) => {
173                write!(f, "unknown escape character {char}")
174            }
175        }
176    }
177}
178
179impl From<MalformedStringError>
180    for lalrpop_util::ParseError<usize, lexer::Token<'static>, AdditionalErrors>
181{
182    fn from(value: MalformedStringError) -> Self {
183        lalrpop_util::ParseError::User {
184            error: AdditionalErrors::MalformedString(value),
185        }
186    }
187}
188
189#[cfg(test)]
190mod tests {
191    use super::trim_block_string_whitespace;
192
193    #[test]
194    fn test_block_string_trim() {
195        assert_eq!(
196            trim_block_string_whitespace(
197                r#"Hello there you fool
198
199            I am a thing
200
201                I am indented
202            "#
203            ),
204            "Hello there you fool\n\nI am a thing\n\n    I am indented"
205        );
206
207        assert_eq!(
208            trim_block_string_whitespace(
209                r#"
210            Hello there you fool
211
212            I am a thing
213
214                I am indented
215            "#
216            ),
217            "Hello there you fool\n\nI am a thing\n\n    I am indented"
218        );
219    }
220}