1#![deny(unsafe_code)]
3use alloc::boxed::Box;
4use alloc::string::{String, ToString};
5use alloc::vec::Vec;
6use core::fmt;
7
8use crate::helper::{ContainerKind, EncodingType, Helper, NestingLevel, Primitive};
9use crate::{Encoding, EncodingBox};
10
11pub(crate) const fn verify_name(name: &str) -> bool {
13 let bytes = name.as_bytes();
14
15 if let b"?" = bytes {
16 return true;
17 }
18
19 if bytes.is_empty() {
20 return false;
21 }
22
23 let mut i = 0;
24 while i < bytes.len() {
25 let byte = bytes[i];
26 if !(byte.is_ascii_alphanumeric() || byte == b'_') {
27 return false;
28 }
29 i += 1;
30 }
31 true
32}
33
34#[derive(Debug, PartialEq, Eq, Hash)]
36pub struct ParseError {
37 kind: ErrorKind,
38 data: String,
39 split_point: usize,
40}
41
42impl ParseError {
43 pub(crate) fn new(parser: Parser<'_>, kind: ErrorKind) -> Self {
44 Self {
45 kind,
46 data: parser.data.to_string(),
47 split_point: parser.split_point,
48 }
49 }
50}
51
52impl fmt::Display for ParseError {
53 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
54 write!(
55 f,
56 "failed parsing encoding: {} at byte-index {} in {:?}",
57 self.kind, self.split_point, self.data,
58 )
59 }
60}
61
62#[cfg(feature = "std")]
63impl std::error::Error for ParseError {}
64
65#[derive(Debug, PartialEq, Eq, Hash)]
66pub(crate) enum ErrorKind {
67 UnexpectedEnd,
68 Unknown(u8),
69 UnknownAfterComplex(u8),
70 ExpectedInteger,
71 IntegerTooLarge,
72 WrongEndArray,
73 WrongEndContainer(ContainerKind),
74 InvalidIdentifier(ContainerKind),
75 NotAllConsumed,
76}
77
78impl fmt::Display for ErrorKind {
79 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
80 match self {
81 Self::UnexpectedEnd => write!(f, "unexpected end"),
82 Self::Unknown(b) => {
83 write!(f, "unknown encoding character {}", *b as char)
84 }
85 Self::UnknownAfterComplex(b) => {
86 write!(f, "unknown encoding character {} after complex", *b as char,)
87 }
88 Self::ExpectedInteger => write!(f, "expected integer"),
89 Self::IntegerTooLarge => write!(f, "integer too large"),
90 Self::WrongEndArray => write!(f, "expected array to be closed"),
91 Self::WrongEndContainer(kind) => {
92 write!(f, "expected {kind} to be closed")
93 }
94 Self::InvalidIdentifier(kind) => {
95 write!(f, "got invalid identifier in {kind}")
96 }
97 Self::NotAllConsumed => {
98 write!(f, "remaining contents after parsing")
99 }
100 }
101 }
102}
103
104type Result<T, E = ErrorKind> = core::result::Result<T, E>;
105
106enum ParseInner {
107 Empty,
108 Encoding(EncodingBox),
109 ContainerEnd(ContainerKind),
110 ArrayEnd,
111}
112
113#[derive(Debug, PartialEq, Eq, Hash, Clone)]
114pub(crate) struct Parser<'a> {
115 data: &'a str,
116 split_point: usize,
118}
119
120impl<'a> Parser<'a> {
121 pub(crate) fn new(data: &'a str) -> Self {
122 Self {
123 split_point: 0,
124 data,
125 }
126 }
127
128 pub(crate) fn remaining(&self) -> &'a str {
129 &self.data[self.split_point..]
130 }
131
132 fn peek(&self) -> Result<u8> {
133 self.try_peek().ok_or(ErrorKind::UnexpectedEnd)
134 }
135
136 fn try_peek(&self) -> Option<u8> {
137 self.data.as_bytes().get(self.split_point).copied()
138 }
139
140 fn try_peek2(&self) -> Option<(u8, u8)> {
141 let bytes = self.data.as_bytes();
142 Some((
143 *bytes.get(self.split_point)?,
144 *bytes.get(self.split_point + 1)?,
145 ))
146 }
147
148 fn advance(&mut self) {
149 self.split_point += 1;
150 }
151
152 fn rollback(&mut self) {
153 self.split_point -= 1;
154 }
155
156 fn consume_while(&mut self, mut condition: impl FnMut(u8) -> bool) {
157 while let Some(b) = self.try_peek() {
158 if condition(b) {
159 self.advance();
160 } else {
161 break;
162 }
163 }
164 }
165
166 pub(crate) fn is_empty(&self) -> bool {
167 self.try_peek().is_none()
168 }
169
170 pub(crate) fn expect_empty(&self) -> Result<()> {
171 if self.is_empty() {
172 Ok(())
173 } else {
174 Err(ErrorKind::NotAllConsumed)
175 }
176 }
177}
178
179impl Parser<'_> {
180 pub(crate) fn strip_leading_qualifiers(&mut self) {
182 #[allow(clippy::byte_char_slices)]
184 const QUALIFIERS: &[u8] = &[
185 b'r', b'n', b'N', b'o', b'O', b'R', b'V', ];
193 self.consume_while(|b| QUALIFIERS.contains(&b));
196 }
197
198 fn chomp_digits(&mut self) -> Result<&str> {
202 let old_split_point = self.split_point;
203
204 if !self.peek()?.is_ascii_digit() {
206 return Err(ErrorKind::ExpectedInteger);
207 }
208
209 self.consume_while(|b| b.is_ascii_digit());
211
212 Ok(&self.data[old_split_point..self.split_point])
213 }
214
215 fn parse_u64(&mut self) -> Result<u64> {
216 self.chomp_digits()?
217 .parse()
218 .map_err(|_| ErrorKind::IntegerTooLarge)
219 }
220
221 fn parse_u8(&mut self) -> Result<u8> {
222 self.chomp_digits()?
223 .parse()
224 .map_err(|_| ErrorKind::IntegerTooLarge)
225 }
226}
227
228impl Parser<'_> {
232 fn expect_byte(&mut self, byte: u8) -> Option<()> {
233 if self.try_peek()? == byte {
234 self.advance();
235 Some(())
236 } else {
237 None
238 }
239 }
240
241 fn expect_one_of_str<'a>(&mut self, strings: impl IntoIterator<Item = &'a str>) -> Option<()> {
242 for s in strings {
243 if self.remaining().starts_with(s) {
244 for b in s.as_bytes() {
245 self.expect_byte(*b).unwrap();
246 }
247 return Some(());
248 }
249 }
250 None
251 }
252
253 fn expect_u64(&mut self, int: u64) -> Option<()> {
254 if self.parse_u64().ok()? == int {
255 Some(())
256 } else {
257 None
258 }
259 }
260
261 fn expect_u8(&mut self, int: u8) -> Option<()> {
262 if self.parse_u8().ok()? == int {
263 Some(())
264 } else {
265 None
266 }
267 }
268
269 pub(crate) fn expect_encoding(&mut self, enc: &Encoding, level: NestingLevel) -> Option<()> {
270 match enc.helper() {
271 Helper::Primitive(primitive) => {
272 self.expect_one_of_str(primitive.equivalents().iter().map(|p| p.to_str()))?;
273
274 if primitive == Primitive::Object && self.try_peek() == Some(b'"') {
275 self.advance();
276 self.consume_while(|b| b != b'"');
277 self.expect_byte(b'"')?;
278 }
279 Some(())
280 }
281 Helper::BitField(size, Some((offset, t))) => {
282 self.expect_byte(b'b')?;
283 self.expect_u64(*offset)?;
284 self.expect_encoding(t, level.bitfield())?;
285 self.expect_u8(size)
286 }
287 Helper::BitField(size, None) => {
288 self.expect_byte(b'b')?;
289 self.expect_u8(size)
290 }
291 Helper::Indirection(kind, t) => {
292 self.expect_byte(kind.prefix_byte())?;
293 self.expect_encoding(t, level.indirection(kind))
294 }
295 Helper::Array(len, item) => {
296 self.expect_byte(b'[')?;
297 self.expect_u64(len)?;
298 self.expect_encoding(item, level.array())?;
299 self.expect_byte(b']')
300 }
301 Helper::Container(kind, name, items) => {
302 self.expect_byte(kind.start_byte())?;
303 self.expect_one_of_str([name])?;
304 if let Some(level) = level.container_include_fields() {
305 self.expect_byte(b'=')?;
306 if items.is_empty() {
308 loop {
309 match self.parse_inner().ok()? {
310 ParseInner::Empty => {
311 return None;
313 }
314 ParseInner::Encoding(_) => {}
315 ParseInner::ContainerEnd(parsed_kind) => {
316 if parsed_kind == kind {
317 return Some(());
318 } else {
319 return None;
320 }
321 }
322 ParseInner::ArrayEnd => {
323 return None;
324 }
325 }
326 }
327 }
328 if self.try_peek() == Some(kind.end_byte()) {
330 self.advance();
331 return Some(());
332 }
333 for item in items {
334 self.expect_encoding(item, level)?;
335 }
336 }
337 self.expect_byte(kind.end_byte())
338 }
339 Helper::NoneInvalid => Some(()),
340 }
341 }
342}
343
344impl Parser<'_> {
345 fn parse_container(&mut self, kind: ContainerKind) -> Result<(&str, Vec<EncodingBox>)> {
346 let old_split_point = self.split_point;
347
348 let has_items = loop {
350 let b = self.try_peek().ok_or(ErrorKind::WrongEndContainer(kind))?;
351 if b == b'=' {
352 break true;
353 } else if b == kind.end_byte() {
354 break false;
355 }
356 self.advance();
357 };
358
359 let s = &self.data[old_split_point..self.split_point];
360
361 if !verify_name(s) {
362 return Err(ErrorKind::InvalidIdentifier(kind));
363 }
364
365 if has_items {
366 self.advance();
367 }
368
369 let mut items = Vec::new();
370 loop {
372 match self.parse_inner()? {
373 ParseInner::Empty => {
374 return Err(ErrorKind::WrongEndContainer(kind));
375 }
376 ParseInner::Encoding(enc) => {
377 items.push(enc);
378 }
379 ParseInner::ContainerEnd(parsed_kind) => {
380 if parsed_kind == kind {
381 return Ok((s, items));
382 } else {
383 return Err(ErrorKind::Unknown(parsed_kind.end_byte()));
384 }
385 }
386 ParseInner::ArrayEnd => {
387 return Err(ErrorKind::Unknown(b']'));
388 }
389 }
390 }
391 }
392
393 pub(crate) fn parse_encoding_or_none(&mut self) -> Result<EncodingBox> {
394 match self.parse_inner()? {
395 ParseInner::Empty => Ok(EncodingBox::None),
396 ParseInner::Encoding(enc) => Ok(enc),
397 ParseInner::ContainerEnd(kind) => Err(ErrorKind::Unknown(kind.end_byte())),
398 ParseInner::ArrayEnd => Err(ErrorKind::Unknown(b']')),
399 }
400 }
401
402 fn parse_inner(&mut self) -> Result<ParseInner> {
403 if self.is_empty() {
404 return Ok(ParseInner::Empty);
405 }
406 let b = self.peek()?;
407 self.advance();
408
409 Ok(ParseInner::Encoding(match b {
410 b'c' => EncodingBox::Char,
411 b's' => EncodingBox::Short,
412 b'i' => EncodingBox::Int,
413 b'l' => EncodingBox::Long,
414 b'q' => EncodingBox::LongLong,
415 b'C' => EncodingBox::UChar,
416 b'S' => EncodingBox::UShort,
417 b'I' => EncodingBox::UInt,
418 b'L' => EncodingBox::ULong,
419 b'Q' => EncodingBox::ULongLong,
420 b'f' => EncodingBox::Float,
421 b'd' => EncodingBox::Double,
422 b'D' => EncodingBox::LongDouble,
423 b'j' => {
424 let res = match self.peek()? {
425 b'f' => EncodingBox::FloatComplex,
426 b'd' => EncodingBox::DoubleComplex,
427 b'D' => EncodingBox::LongDoubleComplex,
428 b => return Err(ErrorKind::UnknownAfterComplex(b)),
429 };
430 self.advance();
431 res
432 }
433 b'B' => EncodingBox::Bool,
434 b'v' => EncodingBox::Void,
435 b'*' => EncodingBox::String,
436 b'@' => match self.try_peek() {
437 Some(b'?') => {
439 self.advance();
440 EncodingBox::Block
441 }
442 Some(b'"') => {
444 self.advance();
445 self.consume_while(|b| b != b'"');
446 self.expect_byte(b'"').ok_or(ErrorKind::UnexpectedEnd)?;
447 EncodingBox::Object
448 }
449 _ => EncodingBox::Object,
450 },
451 b'#' => EncodingBox::Class,
452 b':' => EncodingBox::Sel,
453 b'?' => EncodingBox::Unknown,
454
455 b'b' => {
456 let size_or_offset = self.parse_u64()?;
457 if let Some((size, ty)) = self.try_parse_bitfield_gnustep()? {
458 let offset = size_or_offset;
459 EncodingBox::BitField(size, Some(Box::new((offset, ty))))
460 } else {
461 let size = size_or_offset
462 .try_into()
463 .map_err(|_| ErrorKind::IntegerTooLarge)?;
464 EncodingBox::BitField(size, None)
465 }
466 }
467 b'^' => EncodingBox::Pointer(Box::new(match self.parse_inner()? {
468 ParseInner::Empty => EncodingBox::None,
469 ParseInner::Encoding(enc) => enc,
470 ParseInner::ContainerEnd(_) | ParseInner::ArrayEnd => {
471 self.rollback();
472 EncodingBox::None
473 }
474 })),
475 b'A' => EncodingBox::Atomic(Box::new(match self.parse_inner()? {
476 ParseInner::Empty => EncodingBox::None,
477 ParseInner::Encoding(enc) => enc,
478 ParseInner::ContainerEnd(_) | ParseInner::ArrayEnd => {
479 self.rollback();
480 EncodingBox::None
481 }
482 })),
483 b'[' => {
484 let len = self.parse_u64()?;
485 match self.parse_inner()? {
486 ParseInner::Empty => {
487 return Err(ErrorKind::WrongEndArray);
488 }
489 ParseInner::Encoding(item) => {
490 self.expect_byte(b']').ok_or(ErrorKind::WrongEndArray)?;
491 EncodingBox::Array(len, Box::new(item))
492 }
493 ParseInner::ArrayEnd => EncodingBox::Array(len, Box::new(EncodingBox::None)),
494 ParseInner::ContainerEnd(kind) => {
495 return Err(ErrorKind::Unknown(kind.end_byte()))
496 }
497 }
498 }
499 b']' => {
500 return Ok(ParseInner::ArrayEnd);
501 }
502 b'{' => {
503 let kind = ContainerKind::Struct;
504 let (name, items) = self.parse_container(kind)?;
505 EncodingBox::Struct(name.to_string(), items)
506 }
507 b'}' => {
508 return Ok(ParseInner::ContainerEnd(ContainerKind::Struct));
509 }
510 b'(' => {
511 let kind = ContainerKind::Union;
512 let (name, items) = self.parse_container(kind)?;
513 EncodingBox::Union(name.to_string(), items)
514 }
515 b')' => {
516 return Ok(ParseInner::ContainerEnd(ContainerKind::Union));
517 }
518 b => return Err(ErrorKind::Unknown(b)),
519 }))
520 }
521
522 fn try_parse_bitfield_gnustep(&mut self) -> Result<Option<(u8, EncodingBox)>> {
523 if let Some((b1, b2)) = self.try_peek2() {
524 let ty = match b1 {
528 b'c' => EncodingBox::Char,
529 b's' => EncodingBox::Short,
530 b'i' => EncodingBox::Int,
531 b'l' => EncodingBox::Long,
532 b'q' => EncodingBox::LongLong,
533 b'C' => EncodingBox::UChar,
534 b'S' => EncodingBox::UShort,
535 b'I' => EncodingBox::UInt,
536 b'L' => EncodingBox::ULong,
537 b'Q' => EncodingBox::ULongLong,
538 b'B' => EncodingBox::Bool,
539 _ => return Ok(None),
540 };
541 if !b2.is_ascii_digit() {
544 return Ok(None);
545 }
546 self.advance();
548 let size = self.parse_u8()?;
550 Ok(Some((size, ty)))
551 } else {
552 Ok(None)
553 }
554 }
555}
556
557#[cfg(test)]
558mod tests {
559 use super::*;
560 use alloc::vec;
561
562 #[test]
563 fn parse_container() {
564 const KIND: ContainerKind = ContainerKind::Struct;
565
566 #[track_caller]
567 fn assert_name(enc: &str, expected: Result<(&str, Vec<EncodingBox>)>) {
568 let mut parser = Parser::new(enc);
569 assert_eq!(parser.parse_container(KIND), expected);
570 }
571
572 assert_name("abc=}", Ok(("abc", vec![])));
573 assert_name(
574 "abc=ii}",
575 Ok(("abc", vec![EncodingBox::Int, EncodingBox::Int])),
576 );
577 assert_name("_=}.a'", Ok(("_", vec![])));
578 assert_name("abc}def", Ok(("abc", vec![])));
579 assert_name("=def}", Err(ErrorKind::InvalidIdentifier(KIND)));
580 assert_name(".=def}", Err(ErrorKind::InvalidIdentifier(KIND)));
581 assert_name("}xyz", Err(ErrorKind::InvalidIdentifier(KIND)));
582 assert_name("", Err(ErrorKind::WrongEndContainer(KIND)));
583 assert_name("abc", Err(ErrorKind::WrongEndContainer(KIND)));
584 assert_name("abc)def", Err(ErrorKind::WrongEndContainer(KIND)));
585 }
586
587 #[test]
588 fn parse_bitfield() {
589 #[track_caller]
590 fn assert_bitfield(enc: &str, expected: Result<EncodingBox>) {
591 let mut parser = Parser::new(enc);
592 assert_eq!(
593 parser
594 .parse_encoding_or_none()
595 .and_then(|enc| parser.expect_empty().map(|()| enc)),
596 expected
597 );
598 }
599
600 assert_bitfield("b8", Ok(EncodingBox::BitField(8, None)));
601 assert_bitfield("b8C", Err(ErrorKind::NotAllConsumed));
602 assert_bitfield(
603 "b8C4",
604 Ok(EncodingBox::BitField(
605 4,
606 Some(Box::new((8, EncodingBox::UChar))),
607 )),
608 );
609
610 assert_bitfield(
611 "{s=b8C}",
612 Ok(EncodingBox::Struct(
613 "s".into(),
614 vec![EncodingBox::BitField(8, None), EncodingBox::UChar],
615 )),
616 );
617
618 assert_bitfield("b2000", Err(ErrorKind::IntegerTooLarge));
619 assert_bitfield(
620 "b2000c100",
621 Ok(EncodingBox::BitField(
622 100,
623 Some(Box::new((2000, EncodingBox::Char))),
624 )),
625 );
626 assert_bitfield("b2000C257", Err(ErrorKind::IntegerTooLarge));
627 }
628
629 #[test]
630 fn parse_closing() {
631 let mut parser = Parser::new("]");
632 assert_eq!(
633 parser.parse_encoding_or_none(),
634 Err(ErrorKind::Unknown(b']'))
635 );
636 }
637}