gix_config/value/
normalize.rs

1use std::borrow::Cow;
2
3use bstr::{BStr, BString, ByteSlice};
4
5/// Removes quotes, if any, from the provided inputs, and transforms
6/// the 3 escape sequences `\n`, `\t` and `\b` into newline and tab
7/// respectively, while `\b` will remove the previous character.
8///
9/// It assumes the input contains a even number of unescaped quotes,
10/// and will unescape escaped quotes and everything else (even though the latter
11/// would have been rejected in the parsing stage).
12///
13/// The return values should be safe for value interpretation.
14///
15/// This has optimizations for fully-quoted values, where the returned value
16/// will be a borrowed reference if the only mutation necessary is to unquote
17/// the value.
18///
19/// This is the function used to normalize raw values from higher level
20/// abstractions. Generally speaking these
21/// high level abstractions will handle normalization for you, and you do not
22/// need to call this yourself. However, if you're directly handling events
23/// from the parser, you may want to use this to help with value interpretation.
24///
25/// Generally speaking, you'll want to use one of the variants of this function,
26/// such as [`normalize_bstr`] or [`normalize_bstring`].
27///
28/// # Examples
29///
30/// Values don't need modification are returned borrowed, without allocation.
31///
32/// ```
33/// # use std::borrow::Cow;
34/// # use bstr::ByteSlice;
35/// # use gix_config::value::normalize_bstr;
36/// assert!(matches!(normalize_bstr("hello world"), Cow::Borrowed(_)));
37/// ```
38///
39/// Internally quoted values are turned into owned variant with quotes removed.
40///
41/// ```
42/// # use std::borrow::Cow;
43/// # use bstr::{BStr, BString};
44/// # use gix_config::value::{normalize_bstr};
45/// assert_eq!(normalize_bstr("hello \"world\""), Cow::<BStr>::Owned(BString::from("hello world")));
46/// ```
47///
48/// Escaped quotes are unescaped.
49///
50/// ```
51/// # use std::borrow::Cow;
52/// # use bstr::{BStr, BString};
53/// # use gix_config::value::normalize_bstr;
54/// assert_eq!(normalize_bstr(r#"hello "world\"""#), Cow::<BStr>::Owned(BString::from(r#"hello world""#)));
55/// ```
56#[must_use]
57pub fn normalize(mut input: Cow<'_, BStr>) -> Cow<'_, BStr> {
58    if input.as_ref() == "\"\"" {
59        return Cow::Borrowed("".into());
60    }
61    // An optimization to strip enclosing quotes without producing a new value/copy it.
62    while input.len() >= 3 && input[0] == b'"' && input[input.len() - 1] == b'"' && input[input.len() - 2] != b'\\' {
63        match &mut input {
64            Cow::Borrowed(input) => *input = &input[1..input.len() - 1],
65            Cow::Owned(input) => {
66                input.pop();
67                input.remove(0);
68            }
69        }
70        if input.as_ref() == "\"\"" {
71            return Cow::Borrowed("".into());
72        }
73    }
74
75    if input.find_byteset(br#"\""#).is_none() {
76        return input;
77    }
78    let mut out: BString = Vec::with_capacity(input.len()).into();
79    let mut bytes = input.iter().copied();
80    while let Some(c) = bytes.next() {
81        match c {
82            b'\\' => match bytes.next() {
83                Some(b'n') => out.push(b'\n'),
84                Some(b't') => out.push(b'\t'),
85                Some(b'b') => {
86                    out.pop();
87                }
88                Some(c) => {
89                    out.push(c);
90                }
91                None => break,
92            },
93            b'"' => {}
94            _ => out.push(c),
95        }
96    }
97    Cow::Owned(out)
98}
99
100/// `&[u8]` variant of [`normalize`].
101#[must_use]
102pub fn normalize_bstr<'a>(input: impl Into<&'a BStr>) -> Cow<'a, BStr> {
103    normalize(Cow::Borrowed(input.into()))
104}
105
106/// `Vec[u8]` variant of [`normalize`].
107#[must_use]
108pub fn normalize_bstring(input: impl Into<BString>) -> Cow<'static, BStr> {
109    normalize(Cow::Owned(input.into()))
110}