objc2_encode/
parse.rs

1//! Parsing encodings from their string representation.
2#![deny(unsafe_code)]
3use alloc::boxed::Box;
4use alloc::string::{String, ToString};
5use alloc::vec::Vec;
6use core::fmt;
7
8use crate::helper::{ContainerKind, EncodingType, Helper, NestingLevel, Primitive};
9use crate::{Encoding, EncodingBox};
10
11/// Check whether a struct or union name is a valid identifier
12pub(crate) const fn verify_name(name: &str) -> bool {
13    let bytes = name.as_bytes();
14
15    if let b"?" = bytes {
16        return true;
17    }
18
19    if bytes.is_empty() {
20        return false;
21    }
22
23    let mut i = 0;
24    while i < bytes.len() {
25        let byte = bytes[i];
26        if !(byte.is_ascii_alphanumeric() || byte == b'_') {
27            return false;
28        }
29        i += 1;
30    }
31    true
32}
33
34/// The error that was encountered while parsing an encoding string.
35#[derive(Debug, PartialEq, Eq, Hash)]
36pub struct ParseError {
37    kind: ErrorKind,
38    data: String,
39    split_point: usize,
40}
41
42impl ParseError {
43    pub(crate) fn new(parser: Parser<'_>, kind: ErrorKind) -> Self {
44        Self {
45            kind,
46            data: parser.data.to_string(),
47            split_point: parser.split_point,
48        }
49    }
50}
51
52impl fmt::Display for ParseError {
53    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
54        write!(
55            f,
56            "failed parsing encoding: {} at byte-index {} in {:?}",
57            self.kind, self.split_point, self.data,
58        )
59    }
60}
61
62#[cfg(feature = "std")]
63impl std::error::Error for ParseError {}
64
65#[derive(Debug, PartialEq, Eq, Hash)]
66pub(crate) enum ErrorKind {
67    UnexpectedEnd,
68    Unknown(u8),
69    UnknownAfterComplex(u8),
70    ExpectedInteger,
71    IntegerTooLarge,
72    WrongEndArray,
73    WrongEndContainer(ContainerKind),
74    InvalidIdentifier(ContainerKind),
75    NotAllConsumed,
76}
77
78impl fmt::Display for ErrorKind {
79    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
80        match self {
81            Self::UnexpectedEnd => write!(f, "unexpected end"),
82            Self::Unknown(b) => {
83                write!(f, "unknown encoding character {}", *b as char)
84            }
85            Self::UnknownAfterComplex(b) => {
86                write!(f, "unknown encoding character {} after complex", *b as char,)
87            }
88            Self::ExpectedInteger => write!(f, "expected integer"),
89            Self::IntegerTooLarge => write!(f, "integer too large"),
90            Self::WrongEndArray => write!(f, "expected array to be closed"),
91            Self::WrongEndContainer(kind) => {
92                write!(f, "expected {kind} to be closed")
93            }
94            Self::InvalidIdentifier(kind) => {
95                write!(f, "got invalid identifier in {kind}")
96            }
97            Self::NotAllConsumed => {
98                write!(f, "remaining contents after parsing")
99            }
100        }
101    }
102}
103
104type Result<T, E = ErrorKind> = core::result::Result<T, E>;
105
106enum ParseInner {
107    Empty,
108    Encoding(EncodingBox),
109    ContainerEnd(ContainerKind),
110    ArrayEnd,
111}
112
113#[derive(Debug, PartialEq, Eq, Hash, Clone)]
114pub(crate) struct Parser<'a> {
115    data: &'a str,
116    // Always "behind"/"at" the current character
117    split_point: usize,
118}
119
120impl<'a> Parser<'a> {
121    pub(crate) fn new(data: &'a str) -> Self {
122        Self {
123            split_point: 0,
124            data,
125        }
126    }
127
128    pub(crate) fn remaining(&self) -> &'a str {
129        &self.data[self.split_point..]
130    }
131
132    fn peek(&self) -> Result<u8> {
133        self.try_peek().ok_or(ErrorKind::UnexpectedEnd)
134    }
135
136    fn try_peek(&self) -> Option<u8> {
137        self.data.as_bytes().get(self.split_point).copied()
138    }
139
140    fn try_peek2(&self) -> Option<(u8, u8)> {
141        let bytes = self.data.as_bytes();
142        Some((
143            *bytes.get(self.split_point)?,
144            *bytes.get(self.split_point + 1)?,
145        ))
146    }
147
148    fn advance(&mut self) {
149        self.split_point += 1;
150    }
151
152    fn rollback(&mut self) {
153        self.split_point -= 1;
154    }
155
156    fn consume_while(&mut self, mut condition: impl FnMut(u8) -> bool) {
157        while let Some(b) = self.try_peek() {
158            if condition(b) {
159                self.advance();
160            } else {
161                break;
162            }
163        }
164    }
165
166    pub(crate) fn is_empty(&self) -> bool {
167        self.try_peek().is_none()
168    }
169
170    pub(crate) fn expect_empty(&self) -> Result<()> {
171        if self.is_empty() {
172            Ok(())
173        } else {
174            Err(ErrorKind::NotAllConsumed)
175        }
176    }
177}
178
179impl Parser<'_> {
180    /// Strip leading qualifiers, if any.
181    pub(crate) fn strip_leading_qualifiers(&mut self) {
182        // TODO: Add API for accessing and outputting qualifiers.
183        #[allow(clippy::byte_char_slices)]
184        const QUALIFIERS: &[u8] = &[
185            b'r', // const
186            b'n', // in
187            b'N', // inout
188            b'o', // out
189            b'O', // bycopy
190            b'R', // byref
191            b'V', // oneway
192        ];
193        // TODO: b'|', // GCINVISIBLE
194
195        self.consume_while(|b| QUALIFIERS.contains(&b));
196    }
197
198    /// Chomp until we hit a non-digit.
199    ///
200    /// + and - prefixes are not supported.
201    fn chomp_digits(&mut self) -> Result<&str> {
202        let old_split_point = self.split_point;
203
204        // Parse first digit (which must be present).
205        if !self.peek()?.is_ascii_digit() {
206            return Err(ErrorKind::ExpectedInteger);
207        }
208
209        // Parse the rest, stopping if we hit a non-digit.
210        self.consume_while(|b| b.is_ascii_digit());
211
212        Ok(&self.data[old_split_point..self.split_point])
213    }
214
215    fn parse_u64(&mut self) -> Result<u64> {
216        self.chomp_digits()?
217            .parse()
218            .map_err(|_| ErrorKind::IntegerTooLarge)
219    }
220
221    fn parse_u8(&mut self) -> Result<u8> {
222        self.chomp_digits()?
223            .parse()
224            .map_err(|_| ErrorKind::IntegerTooLarge)
225    }
226}
227
228/// Check if the data matches an expected value.
229///
230/// The errors here aren't currently used, so they're hackily set up.
231impl Parser<'_> {
232    fn expect_byte(&mut self, byte: u8) -> Option<()> {
233        if self.try_peek()? == byte {
234            self.advance();
235            Some(())
236        } else {
237            None
238        }
239    }
240
241    fn expect_one_of_str<'a>(&mut self, strings: impl IntoIterator<Item = &'a str>) -> Option<()> {
242        for s in strings {
243            if self.remaining().starts_with(s) {
244                for b in s.as_bytes() {
245                    self.expect_byte(*b).unwrap();
246                }
247                return Some(());
248            }
249        }
250        None
251    }
252
253    fn expect_u64(&mut self, int: u64) -> Option<()> {
254        if self.parse_u64().ok()? == int {
255            Some(())
256        } else {
257            None
258        }
259    }
260
261    fn expect_u8(&mut self, int: u8) -> Option<()> {
262        if self.parse_u8().ok()? == int {
263            Some(())
264        } else {
265            None
266        }
267    }
268
269    pub(crate) fn expect_encoding(&mut self, enc: &Encoding, level: NestingLevel) -> Option<()> {
270        match enc.helper() {
271            Helper::Primitive(primitive) => {
272                self.expect_one_of_str(primitive.equivalents().iter().map(|p| p.to_str()))?;
273
274                if primitive == Primitive::Object && self.try_peek() == Some(b'"') {
275                    self.advance();
276                    self.consume_while(|b| b != b'"');
277                    self.expect_byte(b'"')?;
278                }
279                Some(())
280            }
281            Helper::BitField(size, Some((offset, t))) => {
282                self.expect_byte(b'b')?;
283                self.expect_u64(*offset)?;
284                self.expect_encoding(t, level.bitfield())?;
285                self.expect_u8(size)
286            }
287            Helper::BitField(size, None) => {
288                self.expect_byte(b'b')?;
289                self.expect_u8(size)
290            }
291            Helper::Indirection(kind, t) => {
292                self.expect_byte(kind.prefix_byte())?;
293                self.expect_encoding(t, level.indirection(kind))
294            }
295            Helper::Array(len, item) => {
296                self.expect_byte(b'[')?;
297                self.expect_u64(len)?;
298                self.expect_encoding(item, level.array())?;
299                self.expect_byte(b']')
300            }
301            Helper::Container(kind, name, items) => {
302                self.expect_byte(kind.start_byte())?;
303                self.expect_one_of_str([name])?;
304                if let Some(level) = level.container_include_fields() {
305                    self.expect_byte(b'=')?;
306                    // Parse as equal if the container is empty
307                    if items.is_empty() {
308                        loop {
309                            match self.parse_inner().ok()? {
310                                ParseInner::Empty => {
311                                    // Require the container to have an end
312                                    return None;
313                                }
314                                ParseInner::Encoding(_) => {}
315                                ParseInner::ContainerEnd(parsed_kind) => {
316                                    if parsed_kind == kind {
317                                        return Some(());
318                                    } else {
319                                        return None;
320                                    }
321                                }
322                                ParseInner::ArrayEnd => {
323                                    return None;
324                                }
325                            }
326                        }
327                    }
328                    // Parse as equal if the string's container is empty
329                    if self.try_peek() == Some(kind.end_byte()) {
330                        self.advance();
331                        return Some(());
332                    }
333                    for item in items {
334                        self.expect_encoding(item, level)?;
335                    }
336                }
337                self.expect_byte(kind.end_byte())
338            }
339            Helper::NoneInvalid => Some(()),
340        }
341    }
342}
343
344impl Parser<'_> {
345    fn parse_container(&mut self, kind: ContainerKind) -> Result<(&str, Vec<EncodingBox>)> {
346        let old_split_point = self.split_point;
347
348        // Parse name until hits `=` or `}`/`)`
349        let has_items = loop {
350            let b = self.try_peek().ok_or(ErrorKind::WrongEndContainer(kind))?;
351            if b == b'=' {
352                break true;
353            } else if b == kind.end_byte() {
354                break false;
355            }
356            self.advance();
357        };
358
359        let s = &self.data[old_split_point..self.split_point];
360
361        if !verify_name(s) {
362            return Err(ErrorKind::InvalidIdentifier(kind));
363        }
364
365        if has_items {
366            self.advance();
367        }
368
369        let mut items = Vec::new();
370        // Parse items until hits end
371        loop {
372            match self.parse_inner()? {
373                ParseInner::Empty => {
374                    return Err(ErrorKind::WrongEndContainer(kind));
375                }
376                ParseInner::Encoding(enc) => {
377                    items.push(enc);
378                }
379                ParseInner::ContainerEnd(parsed_kind) => {
380                    if parsed_kind == kind {
381                        return Ok((s, items));
382                    } else {
383                        return Err(ErrorKind::Unknown(parsed_kind.end_byte()));
384                    }
385                }
386                ParseInner::ArrayEnd => {
387                    return Err(ErrorKind::Unknown(b']'));
388                }
389            }
390        }
391    }
392
393    pub(crate) fn parse_encoding_or_none(&mut self) -> Result<EncodingBox> {
394        match self.parse_inner()? {
395            ParseInner::Empty => Ok(EncodingBox::None),
396            ParseInner::Encoding(enc) => Ok(enc),
397            ParseInner::ContainerEnd(kind) => Err(ErrorKind::Unknown(kind.end_byte())),
398            ParseInner::ArrayEnd => Err(ErrorKind::Unknown(b']')),
399        }
400    }
401
402    fn parse_inner(&mut self) -> Result<ParseInner> {
403        if self.is_empty() {
404            return Ok(ParseInner::Empty);
405        }
406        let b = self.peek()?;
407        self.advance();
408
409        Ok(ParseInner::Encoding(match b {
410            b'c' => EncodingBox::Char,
411            b's' => EncodingBox::Short,
412            b'i' => EncodingBox::Int,
413            b'l' => EncodingBox::Long,
414            b'q' => EncodingBox::LongLong,
415            b'C' => EncodingBox::UChar,
416            b'S' => EncodingBox::UShort,
417            b'I' => EncodingBox::UInt,
418            b'L' => EncodingBox::ULong,
419            b'Q' => EncodingBox::ULongLong,
420            b'f' => EncodingBox::Float,
421            b'd' => EncodingBox::Double,
422            b'D' => EncodingBox::LongDouble,
423            b'j' => {
424                let res = match self.peek()? {
425                    b'f' => EncodingBox::FloatComplex,
426                    b'd' => EncodingBox::DoubleComplex,
427                    b'D' => EncodingBox::LongDoubleComplex,
428                    b => return Err(ErrorKind::UnknownAfterComplex(b)),
429                };
430                self.advance();
431                res
432            }
433            b'B' => EncodingBox::Bool,
434            b'v' => EncodingBox::Void,
435            b'*' => EncodingBox::String,
436            b'@' => match self.try_peek() {
437                // Special handling for blocks
438                Some(b'?') => {
439                    self.advance();
440                    EncodingBox::Block
441                }
442                // Parse class name if present
443                Some(b'"') => {
444                    self.advance();
445                    self.consume_while(|b| b != b'"');
446                    self.expect_byte(b'"').ok_or(ErrorKind::UnexpectedEnd)?;
447                    EncodingBox::Object
448                }
449                _ => EncodingBox::Object,
450            },
451            b'#' => EncodingBox::Class,
452            b':' => EncodingBox::Sel,
453            b'?' => EncodingBox::Unknown,
454
455            b'b' => {
456                let size_or_offset = self.parse_u64()?;
457                if let Some((size, ty)) = self.try_parse_bitfield_gnustep()? {
458                    let offset = size_or_offset;
459                    EncodingBox::BitField(size, Some(Box::new((offset, ty))))
460                } else {
461                    let size = size_or_offset
462                        .try_into()
463                        .map_err(|_| ErrorKind::IntegerTooLarge)?;
464                    EncodingBox::BitField(size, None)
465                }
466            }
467            b'^' => EncodingBox::Pointer(Box::new(match self.parse_inner()? {
468                ParseInner::Empty => EncodingBox::None,
469                ParseInner::Encoding(enc) => enc,
470                ParseInner::ContainerEnd(_) | ParseInner::ArrayEnd => {
471                    self.rollback();
472                    EncodingBox::None
473                }
474            })),
475            b'A' => EncodingBox::Atomic(Box::new(match self.parse_inner()? {
476                ParseInner::Empty => EncodingBox::None,
477                ParseInner::Encoding(enc) => enc,
478                ParseInner::ContainerEnd(_) | ParseInner::ArrayEnd => {
479                    self.rollback();
480                    EncodingBox::None
481                }
482            })),
483            b'[' => {
484                let len = self.parse_u64()?;
485                match self.parse_inner()? {
486                    ParseInner::Empty => {
487                        return Err(ErrorKind::WrongEndArray);
488                    }
489                    ParseInner::Encoding(item) => {
490                        self.expect_byte(b']').ok_or(ErrorKind::WrongEndArray)?;
491                        EncodingBox::Array(len, Box::new(item))
492                    }
493                    ParseInner::ArrayEnd => EncodingBox::Array(len, Box::new(EncodingBox::None)),
494                    ParseInner::ContainerEnd(kind) => {
495                        return Err(ErrorKind::Unknown(kind.end_byte()))
496                    }
497                }
498            }
499            b']' => {
500                return Ok(ParseInner::ArrayEnd);
501            }
502            b'{' => {
503                let kind = ContainerKind::Struct;
504                let (name, items) = self.parse_container(kind)?;
505                EncodingBox::Struct(name.to_string(), items)
506            }
507            b'}' => {
508                return Ok(ParseInner::ContainerEnd(ContainerKind::Struct));
509            }
510            b'(' => {
511                let kind = ContainerKind::Union;
512                let (name, items) = self.parse_container(kind)?;
513                EncodingBox::Union(name.to_string(), items)
514            }
515            b')' => {
516                return Ok(ParseInner::ContainerEnd(ContainerKind::Union));
517            }
518            b => return Err(ErrorKind::Unknown(b)),
519        }))
520    }
521
522    fn try_parse_bitfield_gnustep(&mut self) -> Result<Option<(u8, EncodingBox)>> {
523        if let Some((b1, b2)) = self.try_peek2() {
524            // Try to parse the encoding.
525            //
526            // The encoding is always an integral type.
527            let ty = match b1 {
528                b'c' => EncodingBox::Char,
529                b's' => EncodingBox::Short,
530                b'i' => EncodingBox::Int,
531                b'l' => EncodingBox::Long,
532                b'q' => EncodingBox::LongLong,
533                b'C' => EncodingBox::UChar,
534                b'S' => EncodingBox::UShort,
535                b'I' => EncodingBox::UInt,
536                b'L' => EncodingBox::ULong,
537                b'Q' => EncodingBox::ULongLong,
538                b'B' => EncodingBox::Bool,
539                _ => return Ok(None),
540            };
541            // And then check if a digit follows that (which the size would
542            // always contain).
543            if !b2.is_ascii_digit() {
544                return Ok(None);
545            }
546            // We have a size; so let's advance...
547            self.advance();
548            // ...and parse it for real.
549            let size = self.parse_u8()?;
550            Ok(Some((size, ty)))
551        } else {
552            Ok(None)
553        }
554    }
555}
556
557#[cfg(test)]
558mod tests {
559    use super::*;
560    use alloc::vec;
561
562    #[test]
563    fn parse_container() {
564        const KIND: ContainerKind = ContainerKind::Struct;
565
566        #[track_caller]
567        fn assert_name(enc: &str, expected: Result<(&str, Vec<EncodingBox>)>) {
568            let mut parser = Parser::new(enc);
569            assert_eq!(parser.parse_container(KIND), expected);
570        }
571
572        assert_name("abc=}", Ok(("abc", vec![])));
573        assert_name(
574            "abc=ii}",
575            Ok(("abc", vec![EncodingBox::Int, EncodingBox::Int])),
576        );
577        assert_name("_=}.a'", Ok(("_", vec![])));
578        assert_name("abc}def", Ok(("abc", vec![])));
579        assert_name("=def}", Err(ErrorKind::InvalidIdentifier(KIND)));
580        assert_name(".=def}", Err(ErrorKind::InvalidIdentifier(KIND)));
581        assert_name("}xyz", Err(ErrorKind::InvalidIdentifier(KIND)));
582        assert_name("", Err(ErrorKind::WrongEndContainer(KIND)));
583        assert_name("abc", Err(ErrorKind::WrongEndContainer(KIND)));
584        assert_name("abc)def", Err(ErrorKind::WrongEndContainer(KIND)));
585    }
586
587    #[test]
588    fn parse_bitfield() {
589        #[track_caller]
590        fn assert_bitfield(enc: &str, expected: Result<EncodingBox>) {
591            let mut parser = Parser::new(enc);
592            assert_eq!(
593                parser
594                    .parse_encoding_or_none()
595                    .and_then(|enc| parser.expect_empty().map(|()| enc)),
596                expected
597            );
598        }
599
600        assert_bitfield("b8", Ok(EncodingBox::BitField(8, None)));
601        assert_bitfield("b8C", Err(ErrorKind::NotAllConsumed));
602        assert_bitfield(
603            "b8C4",
604            Ok(EncodingBox::BitField(
605                4,
606                Some(Box::new((8, EncodingBox::UChar))),
607            )),
608        );
609
610        assert_bitfield(
611            "{s=b8C}",
612            Ok(EncodingBox::Struct(
613                "s".into(),
614                vec![EncodingBox::BitField(8, None), EncodingBox::UChar],
615            )),
616        );
617
618        assert_bitfield("b2000", Err(ErrorKind::IntegerTooLarge));
619        assert_bitfield(
620            "b2000c100",
621            Ok(EncodingBox::BitField(
622                100,
623                Some(Box::new((2000, EncodingBox::Char))),
624            )),
625        );
626        assert_bitfield("b2000C257", Err(ErrorKind::IntegerTooLarge));
627    }
628
629    #[test]
630    fn parse_closing() {
631        let mut parser = Parser::new("]");
632        assert_eq!(
633            parser.parse_encoding_or_none(),
634            Err(ErrorKind::Unknown(b']'))
635        );
636    }
637}