1use std::char;
6use std::io;
7use std::marker::PhantomData;
8use std::str;
9
10use serde::de;
11
12use super::error::{Error, ErrorCode, Result};
13use super::util::StringReader;
14use super::util::{Number, ParseNumber};
15
16enum State {
17 Normal,
18 Root,
19 Keyname,
20}
21
22pub struct Deserializer<Iter: Iterator<Item = u8>> {
24 rdr: StringReader<Iter>,
25 str_buf: Vec<u8>,
26 state: State,
27}
28
29impl<Iter> Deserializer<Iter>
30where
31 Iter: Iterator<Item = u8>,
32{
33 #[inline]
35 pub fn new(rdr: Iter) -> Deserializer<Iter> {
36 Deserializer {
37 rdr: StringReader::new(rdr),
38 str_buf: Vec::with_capacity(128),
39 state: State::Normal,
40 }
41 }
42
43 #[inline]
45 pub fn new_for_root(rdr: Iter) -> Deserializer<Iter> {
46 let mut res = Deserializer::new(rdr);
47 res.state = State::Root;
48 res
49 }
50
51 #[inline]
55 pub fn end(&mut self) -> Result<()> {
56 self.rdr.parse_whitespace()?;
57 if self.rdr.eof()? {
58 Ok(())
59 } else {
60 Err(self.rdr.error(ErrorCode::TrailingCharacters))
61 }
62 }
63
64 fn is_punctuator_char(&self, ch: u8) -> bool {
65 matches!(ch, b'{' | b'}' | b'[' | b']' | b',' | b':')
66 }
67
68 fn parse_keyname<'de, V>(&mut self, visitor: V) -> Result<V::Value>
69 where
70 V: de::Visitor<'de>,
71 {
72 self.str_buf.clear();
77
78 let mut space: Option<usize> = None;
79 loop {
80 let ch = self.rdr.next_char_or_null()?;
81
82 if ch == b':' {
83 if self.str_buf.is_empty() {
84 return Err(self.rdr.error(ErrorCode::Custom(
85 "Found ':' but no key name (for an empty key name use quotes)".to_string(),
86 )));
87 } else if space.is_some()
88 && space.expect("Internal error: json parsing") != self.str_buf.len()
89 {
90 return Err(self.rdr.error(ErrorCode::Custom(
91 "Found whitespace in your key name (use quotes to include)".to_string(),
92 )));
93 }
94 self.rdr.uneat_char(ch);
95 let s = str::from_utf8(&self.str_buf).expect("Internal error: json parsing");
96 return visitor.visit_str(s);
97 } else if ch <= b' ' {
98 if ch == 0 {
99 return Err(self.rdr.error(ErrorCode::EofWhileParsingObject));
100 } else if space.is_none() {
101 space = Some(self.str_buf.len());
102 }
103 } else if self.is_punctuator_char(ch) {
104 return Err(self.rdr.error(ErrorCode::Custom("Found a punctuator where a key name was expected (check your syntax or use quotes if the key name includes {}[],: or whitespace)".to_string())));
105 } else {
106 self.str_buf.push(ch);
107 }
108 }
109 }
110
111 fn parse_value<'de, V>(&mut self, visitor: V) -> Result<V::Value>
112 where
113 V: de::Visitor<'de>,
114 {
115 self.rdr.parse_whitespace()?;
116
117 if self.rdr.eof()? {
118 return Err(self.rdr.error(ErrorCode::EofWhileParsingValue));
119 }
120
121 match self.state {
122 State::Keyname => {
123 self.state = State::Normal;
124 return self.parse_keyname(visitor);
125 }
126 State::Root => {
127 self.state = State::Normal;
128 return self.visit_map(true, visitor);
129 }
130 _ => {}
131 }
132
133 match self.rdr.peek_or_null()? {
134 b'"' => {
144 self.rdr.eat_char();
145 self.parse_string()?;
146 let s = str::from_utf8(&self.str_buf).expect("Internal error: json parsing");
147 visitor.visit_str(s)
148 }
149 b'[' => {
150 self.rdr.eat_char();
151 let ret = visitor.visit_seq(SeqVisitor::new(self))?;
152 self.rdr.parse_whitespace()?;
153 match self.rdr.next_char()? {
154 Some(b']') => Ok(ret),
155 Some(_) => Err(self.rdr.error(ErrorCode::TrailingCharacters)),
156 None => Err(self.rdr.error(ErrorCode::EofWhileParsingList)),
157 }
158 }
159 b'{' => {
160 self.rdr.eat_char();
161 self.visit_map(false, visitor)
162 }
163 b'\x00' => Err(self.rdr.error(ErrorCode::ExpectedSomeValue)),
164 _ => self.parse_tfnns(visitor),
165 }
166 }
167
168 fn visit_map<'de, V>(&mut self, root: bool, visitor: V) -> Result<V::Value>
169 where
170 V: de::Visitor<'de>,
171 {
172 let ret = visitor.visit_map(MapVisitor::new(self, root))?;
173 self.rdr.parse_whitespace()?;
174 match self.rdr.next_char()? {
175 Some(b'}') => {
176 if !root {
177 Ok(ret)
178 } else {
179 Err(self.rdr.error(ErrorCode::TrailingCharacters))
180 } }
182 Some(_) => Err(self.rdr.error(ErrorCode::TrailingCharacters)),
183 None => {
184 if root {
185 Ok(ret)
186 } else {
187 Err(self.rdr.error(ErrorCode::EofWhileParsingObject))
188 }
189 }
190 }
191 }
192
193 fn parse_ident(&mut self, ident: &[u8]) -> Result<()> {
194 for c in ident {
195 if Some(*c) != self.rdr.next_char()? {
196 return Err(self.rdr.error(ErrorCode::ExpectedSomeIdent));
197 }
198 }
199
200 Ok(())
201 }
202
203 fn parse_tfnns<'de, V>(&mut self, visitor: V) -> Result<V::Value>
204 where
205 V: de::Visitor<'de>,
206 {
207 self.str_buf.clear();
210
211 let first = self.rdr.peek()?.expect("Internal error: json parsing");
212
213 if self.is_punctuator_char(first) {
214 return Err(self.rdr.error(ErrorCode::PunctuatorInQlString));
215 }
216
217 loop {
218 let ch = self.rdr.next_char_or_null()?;
219
220 let is_eol = ch == b'\r' || ch == b'\n' || ch == b'\x00';
221 let is_comment = ch == b'#'
222 || if ch == b'/' {
223 let next = self.rdr.peek_or_null()?;
224 next == b'/' || next == b'*'
225 } else {
226 false
227 };
228 if is_eol || is_comment || ch == b',' || ch == b'}' || ch == b']' {
229 let chf = self.str_buf[0];
230 match chf {
231 b'f' => {
232 if str::from_utf8(&self.str_buf)
233 .expect("Internal error: json parsing")
234 .trim()
235 == "false"
236 {
237 self.rdr.uneat_char(ch);
238 return visitor.visit_bool(false);
239 }
240 }
241 b'n' => {
242 if str::from_utf8(&self.str_buf)
243 .expect("Internal error: json parsing")
244 .trim()
245 == "null"
246 {
247 self.rdr.uneat_char(ch);
248 return visitor.visit_unit();
249 }
250 }
251 b't' => {
252 if str::from_utf8(&self.str_buf)
253 .expect("Internal error: json parsing")
254 .trim()
255 == "true"
256 {
257 self.rdr.uneat_char(ch);
258 return visitor.visit_bool(true);
259 }
260 }
261 _ => {
262 if chf == b'-' || chf.is_ascii_digit() {
263 let mut parser = ParseNumber::new(self.str_buf.iter().copied());
264 match parser.parse(false) {
265 Ok(Number::F64(v)) => {
266 self.rdr.uneat_char(ch);
267 return visitor.visit_f64(v);
268 }
269 Ok(Number::U64(v)) => {
270 self.rdr.uneat_char(ch);
271 return visitor.visit_u64(v);
272 }
273 Ok(Number::I64(v)) => {
274 self.rdr.uneat_char(ch);
275 return visitor.visit_i64(v);
276 }
277 Err(_) => {} }
279 }
280 }
281 }
282 if is_eol {
283 return visitor.visit_str(
285 str::from_utf8(&self.str_buf)
286 .expect("Internal error: json parsing")
287 .trim(),
288 );
289 }
290 }
291 self.str_buf.push(ch);
292
293 if self.str_buf == b"'''" {
294 return self.parse_ml_string(visitor);
295 }
296 }
297 }
298
299 fn decode_hex_escape(&mut self) -> Result<u16> {
300 let mut i = 0;
301 let mut n = 0u16;
302 while i < 4 && !self.rdr.eof()? {
303 n = match self.rdr.next_char_or_null()? {
304 c @ b'0'..=b'9' => n * 16_u16 + ((c as u16) - (b'0' as u16)),
305 b'a' | b'A' => n * 16_u16 + 10_u16,
306 b'b' | b'B' => n * 16_u16 + 11_u16,
307 b'c' | b'C' => n * 16_u16 + 12_u16,
308 b'd' | b'D' => n * 16_u16 + 13_u16,
309 b'e' | b'E' => n * 16_u16 + 14_u16,
310 b'f' | b'F' => n * 16_u16 + 15_u16,
311 _ => {
312 return Err(self.rdr.error(ErrorCode::InvalidEscape));
313 }
314 };
315
316 i += 1;
317 }
318
319 if i != 4 {
321 return Err(self.rdr.error(ErrorCode::InvalidEscape));
322 }
323
324 Ok(n)
325 }
326
327 fn ml_skip_white(&mut self) -> Result<bool> {
328 match self.rdr.peek_or_null()? {
329 b' ' | b'\t' | b'\r' => {
330 self.rdr.eat_char();
331 Ok(true)
332 }
333 _ => Ok(false),
334 }
335 }
336
337 fn ml_skip_indent(&mut self, indent: usize) -> Result<()> {
338 let mut skip = indent;
339 while self.ml_skip_white()? && skip > 0 {
340 skip -= 1;
341 }
342 Ok(())
343 }
344
345 fn parse_ml_string<'de, V>(&mut self, visitor: V) -> Result<V::Value>
346 where
347 V: de::Visitor<'de>,
348 {
349 self.str_buf.clear();
350
351 let mut triple = 0;
353
354 let (_, col) = self.rdr.pos();
356 let indent = col - 4;
357
358 while self.ml_skip_white()? {}
360 if self.rdr.peek_or_null()? == b'\n' {
361 self.rdr.eat_char();
362 self.ml_skip_indent(indent)?;
363 }
364
365 loop {
367 if self.rdr.eof()? {
368 return Err(self.rdr.error(ErrorCode::EofWhileParsingString));
369 } let ch = self.rdr.next_char_or_null()?;
371
372 if ch == b'\'' {
373 triple += 1;
374 if triple == 3 {
375 if self.str_buf.last() == Some(&b'\n') {
376 self.str_buf.pop();
377 }
378 let res = str::from_utf8(&self.str_buf).expect("Internal error: json parsing");
379 return visitor.visit_str(res);
381 } else {
382 continue;
383 }
384 }
385
386 while triple > 0 {
387 self.str_buf.push(b'\'');
388 triple -= 1;
389 }
390
391 if ch != b'\r' {
392 self.str_buf.push(ch);
393 }
394 if ch == b'\n' {
395 self.ml_skip_indent(indent)?;
396 }
397 }
398 }
399
400 fn parse_string(&mut self) -> Result<()> {
401 self.str_buf.clear();
402
403 loop {
404 let ch = match self.rdr.next_char()? {
405 Some(ch) => ch,
406 None => {
407 return Err(self.rdr.error(ErrorCode::EofWhileParsingString));
408 }
409 };
410
411 match ch {
412 b'"' => {
413 return Ok(());
414 }
415 b'\\' => {
416 let ch = match self.rdr.next_char()? {
417 Some(ch) => ch,
418 None => {
419 return Err(self.rdr.error(ErrorCode::EofWhileParsingString));
420 }
421 };
422
423 match ch {
424 b'"' => self.str_buf.push(b'"'),
425 b'\\' => self.str_buf.push(b'\\'),
426 b'/' => self.str_buf.push(b'/'),
427 b'b' => self.str_buf.push(b'\x08'),
428 b'f' => self.str_buf.push(b'\x0c'),
429 b'n' => self.str_buf.push(b'\n'),
430 b'r' => self.str_buf.push(b'\r'),
431 b't' => self.str_buf.push(b'\t'),
432 b'u' => {
433 let c = match self.decode_hex_escape()? {
434 0xDC00..=0xDFFF => {
435 return Err(self
436 .rdr
437 .error(ErrorCode::LoneLeadingSurrogateInHexEscape));
438 }
439
440 n1 @ 0xD800..=0xDBFF => {
443 match (self.rdr.next_char()?, self.rdr.next_char()?) {
444 (Some(b'\\'), Some(b'u')) => (),
445 _ => {
446 return Err(self
447 .rdr
448 .error(ErrorCode::UnexpectedEndOfHexEscape));
449 }
450 }
451
452 let n2 = self.decode_hex_escape()?;
453
454 if !(0xDC00..=0xDFFF).contains(&n2) {
455 return Err(self
456 .rdr
457 .error(ErrorCode::LoneLeadingSurrogateInHexEscape));
458 }
459
460 let n = ((((n1 - 0xD800) as u32) << 10) | (n2 - 0xDC00) as u32)
461 + 0x1_0000;
462
463 match char::from_u32(n) {
464 Some(c) => c,
465 None => {
466 return Err(self
467 .rdr
468 .error(ErrorCode::InvalidUnicodeCodePoint));
469 }
470 }
471 }
472
473 n => match char::from_u32(n as u32) {
474 Some(c) => c,
475 None => {
476 return Err(self
477 .rdr
478 .error(ErrorCode::InvalidUnicodeCodePoint));
479 }
480 },
481 };
482
483 self.str_buf.extend(c.encode_utf8(&mut [0; 4]).as_bytes());
484 }
485 _ => {
486 return Err(self.rdr.error(ErrorCode::InvalidEscape));
487 }
488 }
489 }
490 ch => {
491 self.str_buf.push(ch);
492 }
493 }
494 }
495 }
496
497 fn parse_object_colon(&mut self) -> Result<()> {
498 self.rdr.parse_whitespace()?;
499
500 match self.rdr.next_char()? {
501 Some(b':') => Ok(()),
502 Some(_) => Err(self.rdr.error(ErrorCode::ExpectedColon)),
503 None => Err(self.rdr.error(ErrorCode::EofWhileParsingObject)),
504 }
505 }
506}
507
508impl<'de, Iter> de::Deserializer<'de> for &mut Deserializer<Iter>
509where
510 Iter: Iterator<Item = u8>,
511{
512 type Error = Error;
513
514 #[inline]
515 fn deserialize_any<V>(self, visitor: V) -> Result<V::Value>
516 where
517 V: de::Visitor<'de>,
518 {
519 if let State::Root = self.state {}
520
521 self.parse_value(visitor)
522 }
523
524 #[inline]
526 fn deserialize_option<V>(self, visitor: V) -> Result<V::Value>
527 where
528 V: de::Visitor<'de>,
529 {
530 self.rdr.parse_whitespace()?;
531
532 match self.rdr.peek_or_null()? {
533 b'n' => {
534 self.rdr.eat_char();
535 self.parse_ident(b"ull")?;
536 visitor.visit_none()
537 }
538 _ => visitor.visit_some(self),
539 }
540 }
541
542 #[inline]
544 fn deserialize_newtype_struct<V>(self, _name: &str, visitor: V) -> Result<V::Value>
545 where
546 V: de::Visitor<'de>,
547 {
548 visitor.visit_newtype_struct(self)
549 }
550
551 serde::forward_to_deserialize_any! {
552 bool i8 i16 i32 i64 i128 u8 u16 u32 u64 u128 f32 f64 char str string
553 bytes byte_buf unit unit_struct seq tuple map
554 tuple_struct struct enum identifier ignored_any
555 }
556}
557
558struct SeqVisitor<'a, Iter: 'a + Iterator<Item = u8>> {
559 de: &'a mut Deserializer<Iter>,
560}
561
562impl<'a, Iter: Iterator<Item = u8>> SeqVisitor<'a, Iter> {
563 fn new(de: &'a mut Deserializer<Iter>) -> Self {
564 SeqVisitor { de }
565 }
566}
567
568impl<'de, Iter> de::SeqAccess<'de> for SeqVisitor<'_, Iter>
569where
570 Iter: Iterator<Item = u8>,
571{
572 type Error = Error;
573
574 fn next_element_seed<T>(&mut self, seed: T) -> Result<Option<T::Value>>
575 where
576 T: de::DeserializeSeed<'de>,
577 {
578 self.de.rdr.parse_whitespace()?;
579
580 match self.de.rdr.peek()? {
581 Some(b']') => {
582 return Ok(None);
583 }
584 Some(_) => {}
585 None => {
586 return Err(self.de.rdr.error(ErrorCode::EofWhileParsingList));
587 }
588 }
589
590 let value = seed.deserialize(&mut *self.de)?;
591
592 self.de.rdr.parse_whitespace()?;
594 if self.de.rdr.peek()? == Some(b',') {
595 self.de.rdr.eat_char();
596 self.de.rdr.parse_whitespace()?;
597 }
598
599 Ok(Some(value))
600 }
601}
602
603struct MapVisitor<'a, Iter: 'a + Iterator<Item = u8>> {
604 de: &'a mut Deserializer<Iter>,
605 first: bool,
606 root: bool,
607}
608
609impl<'a, Iter: Iterator<Item = u8>> MapVisitor<'a, Iter> {
610 fn new(de: &'a mut Deserializer<Iter>, root: bool) -> Self {
611 MapVisitor {
612 de,
613 first: true,
614 root,
615 }
616 }
617}
618
619impl<'de, Iter> de::MapAccess<'de> for MapVisitor<'_, Iter>
620where
621 Iter: Iterator<Item = u8>,
622{
623 type Error = Error;
624
625 fn next_key_seed<K>(&mut self, seed: K) -> Result<Option<K::Value>>
626 where
627 K: de::DeserializeSeed<'de>,
628 {
629 self.de.rdr.parse_whitespace()?;
630
631 if self.first {
632 self.first = false;
633 } else if self.de.rdr.peek()? == Some(b',') {
634 self.de.rdr.eat_char();
636 self.de.rdr.parse_whitespace()?;
637 }
638
639 match self.de.rdr.peek()? {
640 Some(b'}') => return Ok(None), Some(_) => {}
642 None => {
643 if self.root {
644 return Ok(None);
645 } else {
646 return Err(self.de.rdr.error(ErrorCode::EofWhileParsingObject));
647 }
648 }
649 }
650
651 match self.de.rdr.peek()? {
652 Some(ch) => {
653 self.de.state = if ch == b'"' {
654 State::Normal
655 } else {
656 State::Keyname
657 };
658 Ok(Some(seed.deserialize(&mut *self.de)?))
659 }
660 None => Err(self.de.rdr.error(ErrorCode::EofWhileParsingValue)),
661 }
662 }
663
664 fn next_value_seed<V>(&mut self, seed: V) -> Result<V::Value>
665 where
666 V: de::DeserializeSeed<'de>,
667 {
668 self.de.parse_object_colon()?;
669
670 seed.deserialize(&mut *self.de)
671 }
672}
673
674impl<'de, Iter> de::VariantAccess<'de> for &mut Deserializer<Iter>
675where
676 Iter: Iterator<Item = u8>,
677{
678 type Error = Error;
679
680 fn unit_variant(self) -> Result<()> {
681 de::Deserialize::deserialize(self)
682 }
683
684 fn newtype_variant_seed<T>(self, seed: T) -> Result<T::Value>
685 where
686 T: de::DeserializeSeed<'de>,
687 {
688 seed.deserialize(self)
689 }
690
691 fn tuple_variant<V>(self, _len: usize, visitor: V) -> Result<V::Value>
692 where
693 V: de::Visitor<'de>,
694 {
695 de::Deserializer::deserialize_any(self, visitor)
696 }
697
698 fn struct_variant<V>(self, _fields: &'static [&'static str], visitor: V) -> Result<V::Value>
699 where
700 V: de::Visitor<'de>,
701 {
702 de::Deserializer::deserialize_any(self, visitor)
703 }
704}
705
706pub struct StreamDeserializer<T, Iter>
710where
711 Iter: Iterator<Item = u8>,
712 T: de::DeserializeOwned,
713{
714 deser: Deserializer<Iter>,
715 _marker: PhantomData<T>,
716}
717
718impl<T, Iter> StreamDeserializer<T, Iter>
719where
720 Iter: Iterator<Item = u8>,
721 T: de::DeserializeOwned,
722{
723 pub fn new(iter: Iter) -> StreamDeserializer<T, Iter> {
726 StreamDeserializer {
727 deser: Deserializer::new(iter),
728 _marker: PhantomData,
729 }
730 }
731}
732
733impl<T, Iter> Iterator for StreamDeserializer<T, Iter>
734where
735 Iter: Iterator<Item = u8>,
736 T: de::DeserializeOwned,
737{
738 type Item = Result<T>;
739
740 fn next(&mut self) -> Option<Result<T>> {
741 if let Err(e) = self.deser.rdr.parse_whitespace() {
745 return Some(Err(e));
746 };
747
748 match self.deser.rdr.eof() {
749 Ok(true) => None,
750 Ok(false) => match de::Deserialize::deserialize(&mut self.deser) {
751 Ok(v) => Some(Ok(v)),
752 Err(e) => Some(Err(e)),
753 },
754 Err(e) => Some(Err(e)),
755 }
756 }
757}
758
759pub fn from_iter<I, T>(iter: I) -> Result<T>
764where
765 I: Iterator<Item = io::Result<u8>>,
766 T: de::DeserializeOwned,
767{
768 let fold: io::Result<Vec<_>> = iter.collect();
769
770 if let Err(e) = fold {
771 return Err(Error::Io(e));
772 }
773
774 let bytes = fold.expect("Internal error: json parsing");
775
776 let mut de = Deserializer::new_for_root(bytes.iter().copied());
782 de::Deserialize::deserialize(&mut de)
783 .and_then(|x| de.end().map(|()| x))
784 .or_else(|_| {
785 let mut de2 = Deserializer::new(bytes.iter().copied());
786 de::Deserialize::deserialize(&mut de2).and_then(|x| de2.end().map(|()| x))
787 })
788
789 }
800
801pub fn from_reader<R, T>(rdr: R) -> Result<T>
803where
804 R: io::Read,
805 T: de::DeserializeOwned,
806{
807 from_iter(rdr.bytes())
808}
809
810pub fn from_slice<T>(v: &[u8]) -> Result<T>
812where
813 T: de::DeserializeOwned,
814{
815 from_iter(v.iter().map(|&byte| Ok(byte)))
816}
817
818pub fn from_str<T>(s: &str) -> Result<T>
820where
821 T: de::DeserializeOwned,
822{
823 from_slice(s.as_bytes())
824}