gix_config/value/normalize.rs
1use std::borrow::Cow;
2
3use bstr::{BStr, BString, ByteSlice};
4
5/// Removes quotes, if any, from the provided inputs, and transforms
6/// the 3 escape sequences `\n`, `\t` and `\b` into newline and tab
7/// respectively, while `\b` will remove the previous character.
8///
9/// It assumes the input contains a even number of unescaped quotes,
10/// and will unescape escaped quotes and everything else (even though the latter
11/// would have been rejected in the parsing stage).
12///
13/// The return values should be safe for value interpretation.
14///
15/// This has optimizations for fully-quoted values, where the returned value
16/// will be a borrowed reference if the only mutation necessary is to unquote
17/// the value.
18///
19/// This is the function used to normalize raw values from higher level
20/// abstractions. Generally speaking these
21/// high level abstractions will handle normalization for you, and you do not
22/// need to call this yourself. However, if you're directly handling events
23/// from the parser, you may want to use this to help with value interpretation.
24///
25/// Generally speaking, you'll want to use one of the variants of this function,
26/// such as [`normalize_bstr`] or [`normalize_bstring`].
27///
28/// # Examples
29///
30/// Values don't need modification are returned borrowed, without allocation.
31///
32/// ```
33/// # use std::borrow::Cow;
34/// # use bstr::ByteSlice;
35/// # use gix_config::value::normalize_bstr;
36/// assert!(matches!(normalize_bstr("hello world"), Cow::Borrowed(_)));
37/// ```
38///
39/// Internally quoted values are turned into owned variant with quotes removed.
40///
41/// ```
42/// # use std::borrow::Cow;
43/// # use bstr::{BStr, BString};
44/// # use gix_config::value::{normalize_bstr};
45/// assert_eq!(normalize_bstr("hello \"world\""), Cow::<BStr>::Owned(BString::from("hello world")));
46/// ```
47///
48/// Escaped quotes are unescaped.
49///
50/// ```
51/// # use std::borrow::Cow;
52/// # use bstr::{BStr, BString};
53/// # use gix_config::value::normalize_bstr;
54/// assert_eq!(normalize_bstr(r#"hello "world\"""#), Cow::<BStr>::Owned(BString::from(r#"hello world""#)));
55/// ```
56#[must_use]
57pub fn normalize(mut input: Cow<'_, BStr>) -> Cow<'_, BStr> {
58 if input.as_ref() == "\"\"" {
59 return Cow::Borrowed("".into());
60 }
61 // An optimization to strip enclosing quotes without producing a new value/copy it.
62 while input.len() >= 3 && input[0] == b'"' && input[input.len() - 1] == b'"' && input[input.len() - 2] != b'\\' {
63 match &mut input {
64 Cow::Borrowed(input) => *input = &input[1..input.len() - 1],
65 Cow::Owned(input) => {
66 input.pop();
67 input.remove(0);
68 }
69 }
70 if input.as_ref() == "\"\"" {
71 return Cow::Borrowed("".into());
72 }
73 }
74
75 if input.find_byteset(br#"\""#).is_none() {
76 return input;
77 }
78 let mut out: BString = Vec::with_capacity(input.len()).into();
79 let mut bytes = input.iter().copied();
80 while let Some(c) = bytes.next() {
81 match c {
82 b'\\' => match bytes.next() {
83 Some(b'n') => out.push(b'\n'),
84 Some(b't') => out.push(b'\t'),
85 Some(b'b') => {
86 out.pop();
87 }
88 Some(c) => {
89 out.push(c);
90 }
91 None => break,
92 },
93 b'"' => {}
94 _ => out.push(c),
95 }
96 }
97 Cow::Owned(out)
98}
99
100/// `&[u8]` variant of [`normalize`].
101#[must_use]
102pub fn normalize_bstr<'a>(input: impl Into<&'a BStr>) -> Cow<'a, BStr> {
103 normalize(Cow::Borrowed(input.into()))
104}
105
106/// `Vec[u8]` variant of [`normalize`].
107#[must_use]
108pub fn normalize_bstring(input: impl Into<BString>) -> Cow<'static, BStr> {
109 normalize(Cow::Owned(input.into()))
110}