protobuf_support/lexer/
str_lit.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
use std::fmt;
use std::string::FromUtf8Error;

use crate::lexer::lexer_impl::Lexer;
use crate::lexer::parser_language::ParserLanguage;

#[derive(Debug, thiserror::Error)]
pub enum StrLitDecodeError {
    #[error(transparent)]
    FromUtf8Error(#[from] FromUtf8Error),
    #[error("String literal decode error")]
    OtherError,
}

pub type StrLitDecodeResult<T> = Result<T, StrLitDecodeError>;

/// String literal, both `string` and `bytes`.
#[derive(Clone, Eq, PartialEq, Debug)]
pub struct StrLit {
    pub escaped: String,
}

impl fmt::Display for StrLit {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        write!(f, "\"{}\"", &self.escaped)
    }
}

impl StrLit {
    /// May fail if not valid UTF8
    pub fn decode_utf8(&self) -> StrLitDecodeResult<String> {
        let mut lexer = Lexer::new(&self.escaped, ParserLanguage::Json);
        let mut r = Vec::new();
        while !lexer.eof() {
            r.extend(
                lexer
                    .next_str_lit_bytes()
                    .map_err(|_| StrLitDecodeError::OtherError)?
                    .bytes(),
            );
        }
        Ok(String::from_utf8(r)?)
    }

    pub fn decode_bytes(&self) -> StrLitDecodeResult<Vec<u8>> {
        let mut lexer = Lexer::new(&self.escaped, ParserLanguage::Json);
        let mut r = Vec::new();
        while !lexer.eof() {
            r.extend(
                lexer
                    .next_str_lit_bytes()
                    .map_err(|_| StrLitDecodeError::OtherError)?
                    .bytes(),
            );
        }
        Ok(r)
    }

    pub fn quoted(&self) -> String {
        format!("\"{}\"", self.escaped)
    }
}

#[cfg(test)]
mod test {
    use crate::lexer::str_lit::StrLit;

    #[test]
    fn decode_utf8() {
        assert_eq!(
            "\u{1234}".to_owned(),
            StrLit {
                escaped: "\\341\\210\\264".to_owned()
            }
            .decode_utf8()
            .unwrap()
        )
    }
}