quick_xml/de/
mod.rs

1//! Serde `Deserializer` module.
2//!
3//! Due to the complexity of the XML standard and the fact that Serde was developed
4//! with JSON in mind, not all Serde concepts apply smoothly to XML. This leads to
5//! that fact that some XML concepts are inexpressible in terms of Serde derives
6//! and may require manual deserialization.
7//!
8//! The most notable restriction is the ability to distinguish between _elements_
9//! and _attributes_, as no other format used by serde has such a conception.
10//!
11//! Due to that the mapping is performed in a best effort manner.
12//!
13//!
14//!
15//! Table of Contents
16//! =================
17//! - [Mapping XML to Rust types](#mapping-xml-to-rust-types)
18//!   - [Basics](#basics)
19//!   - [Optional attributes and elements](#optional-attributes-and-elements)
20//!   - [Choices (`xs:choice` XML Schema type)](#choices-xschoice-xml-schema-type)
21//!   - [Sequences (`xs:all` and `xs:sequence` XML Schema types)](#sequences-xsall-and-xssequence-xml-schema-types)
22//! - [Generate Rust types from XML](#generate-rust-types-from-xml)
23//! - [Composition Rules](#composition-rules)
24//! - [Enum Representations](#enum-representations)
25//!   - [Normal enum variant](#normal-enum-variant)
26//!   - [`$text` enum variant](#text-enum-variant)
27//! - [`$text` and `$value` special names](#text-and-value-special-names)
28//!   - [`$text`](#text)
29//!   - [`$value`](#value)
30//!     - [Primitives and sequences of primitives](#primitives-and-sequences-of-primitives)
31//!     - [Structs and sequences of structs](#structs-and-sequences-of-structs)
32//!     - [Enums and sequences of enums](#enums-and-sequences-of-enums)
33//! - [Frequently Used Patterns](#frequently-used-patterns)
34//!   - [`<element>` lists](#element-lists)
35//!   - [Overlapped (Out-of-Order) Elements](#overlapped-out-of-order-elements)
36//!   - [Internally Tagged Enums](#internally-tagged-enums)
37//!
38//!
39//!
40//! Mapping XML to Rust types
41//! =========================
42//!
43//! Type names are never considered when deserializing, so you can name your
44//! types as you wish. Other general rules:
45//! - `struct` field name could be represented in XML only as an attribute name
46//!   or an element name;
47//! - `enum` variant name could be represented in XML only as an attribute name
48//!   or an element name;
49//! - the unit struct, unit type `()` and unit enum variant can be deserialized
50//!   from any valid XML content:
51//!   - attribute and element names;
52//!   - attribute and element values;
53//!   - text or CDATA content (including mixed text and CDATA content).
54//!
55//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
56//!
57//! NOTE: All tests are marked with an `ignore` option, even though they do
58//! compile. This is  because rustdoc marks such blocks with an information
59//! icon unlike `no_run` blocks.
60//!
61//! </div>
62//!
63//! <table>
64//! <thead>
65//! <tr><th colspan="2">
66//!
67//! ## Basics
68//!
69//! </th></tr>
70//! <tr><th>To parse all these XML's...</th><th>...use these Rust type(s)</th></tr>
71//! </thead>
72//! <tbody style="vertical-align:top;">
73//! <tr>
74//! <td>
75//! Content of attributes and text / CDATA content of elements (including mixed
76//! text and CDATA content):
77//!
78//! ```xml
79//! <... ...="content" />
80//! ```
81//! ```xml
82//! <...>content</...>
83//! ```
84//! ```xml
85//! <...><![CDATA[content]]></...>
86//! ```
87//! ```xml
88//! <...>text<![CDATA[cdata]]>text</...>
89//! ```
90//! Mixed text / CDATA content represents one logical string, `"textcdatatext"` in that case.
91//! </td>
92//! <td>
93//!
94//! You can use any type that can be deserialized from an `&str`, for example:
95//! - [`String`] and [`&str`]
96//! - [`Cow<str>`]
97//! - [`u32`], [`f32`] and other numeric types
98//! - `enum`s, like
99//!   ```
100//!   # use pretty_assertions::assert_eq;
101//!   # use serde::Deserialize;
102//!   # #[derive(Debug, PartialEq)]
103//!   #[derive(Deserialize)]
104//!   enum Language {
105//!     Rust,
106//!     Cpp,
107//!     #[serde(other)]
108//!     Other,
109//!   }
110//!   # #[derive(Debug, PartialEq, Deserialize)]
111//!   # struct X { #[serde(rename = "$text")] x: Language }
112//!   # assert_eq!(X { x: Language::Rust  }, quick_xml::de::from_str("<x>Rust</x>").unwrap());
113//!   # assert_eq!(X { x: Language::Cpp   }, quick_xml::de::from_str("<x>C<![CDATA[p]]>p</x>").unwrap());
114//!   # assert_eq!(X { x: Language::Other }, quick_xml::de::from_str("<x><![CDATA[other]]></x>").unwrap());
115//!   ```
116//!
117//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
118//!
119//! NOTE: deserialization to non-owned types (i.e. borrow from the input),
120//! such as `&str`, is possible only if you parse document in the UTF-8
121//! encoding and content does not contain entity references such as `&amp;`,
122//! or character references such as `&#xD;`, as well as text content represented
123//! by one piece of [text] or [CDATA] element.
124//! </div>
125//! <!-- TODO: document an error type returned -->
126//!
127//! [text]: Event::Text
128//! [CDATA]: Event::CData
129//! </td>
130//! </tr>
131//! <!-- 2 ===================================================================================== -->
132//! <tr>
133//! <td>
134//!
135//! Content of attributes and text / CDATA content of elements (including mixed
136//! text and CDATA content), which represents a space-delimited lists, as
137//! specified in the XML Schema specification for [`xs:list`] `simpleType`:
138//!
139//! ```xml
140//! <... ...="element1 element2 ..." />
141//! ```
142//! ```xml
143//! <...>
144//!   element1
145//!   element2
146//!   ...
147//! </...>
148//! ```
149//! ```xml
150//! <...><![CDATA[
151//!   element1
152//!   element2
153//!   ...
154//! ]]></...>
155//! ```
156//!
157//! [`xs:list`]: https://www.w3.org/TR/xmlschema11-2/#list-datatypes
158//! </td>
159//! <td>
160//!
161//! Use any type that deserialized using [`deserialize_seq()`] call, for example:
162//!
163//! ```
164//! type List = Vec<u32>;
165//! ```
166//!
167//! See the next row to learn where in your struct definition you should
168//! use that type.
169//!
170//! According to the XML Schema specification, delimiters for elements is one
171//! or more space (`' '`, `'\r'`, `'\n'`, and `'\t'`) character(s).
172//!
173//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
174//!
175//! NOTE: according to the XML Schema restrictions, you cannot escape those
176//! white-space characters, so list elements will _never_ contain them.
177//! In practice you will usually use `xs:list`s for lists of numbers or enumerated
178//! values which looks like identifiers in many languages, for example, `item`,
179//! `some_item` or `some-item`, so that shouldn't be a problem.
180//!
181//! NOTE: according to the XML Schema specification, list elements can be
182//! delimited only by spaces. Other delimiters (for example, commas) are not
183//! allowed.
184//!
185//! </div>
186//!
187//! [`deserialize_seq()`]: de::Deserializer::deserialize_seq
188//! </td>
189//! </tr>
190//! <!-- 3 ===================================================================================== -->
191//! <tr>
192//! <td>
193//! A typical XML with attributes. The root tag name does not matter:
194//!
195//! ```xml
196//! <any-tag one="..." two="..."/>
197//! ```
198//! </td>
199//! <td>
200//!
201//! A structure where each XML attribute is mapped to a field with a name
202//! starting with `@`. Because Rust identifiers do not permit the `@` character,
203//! you should use the `#[serde(rename = "@...")]` attribute to rename it.
204//! The name of the struct itself does not matter:
205//!
206//! ```
207//! # use serde::Deserialize;
208//! # type T = ();
209//! # type U = ();
210//! // Get both attributes
211//! # #[derive(Debug, PartialEq)]
212//! #[derive(Deserialize)]
213//! struct AnyName {
214//!   #[serde(rename = "@one")]
215//!   one: T,
216//!
217//!   #[serde(rename = "@two")]
218//!   two: U,
219//! }
220//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..." two="..."/>"#).unwrap();
221//! ```
222//! ```
223//! # use serde::Deserialize;
224//! # type T = ();
225//! // Get only the one attribute, ignore the other
226//! # #[derive(Debug, PartialEq)]
227//! #[derive(Deserialize)]
228//! struct AnyName {
229//!   #[serde(rename = "@one")]
230//!   one: T,
231//! }
232//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..." two="..."/>"#).unwrap();
233//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..."/>"#).unwrap();
234//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..."><one>...</one></any-tag>"#).unwrap();
235//! ```
236//! ```
237//! # use serde::Deserialize;
238//! // Ignore all attributes
239//! // You can also use the `()` type (unit type)
240//! # #[derive(Debug, PartialEq)]
241//! #[derive(Deserialize)]
242//! struct AnyName;
243//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..." two="..."/>"#).unwrap();
244//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..."><one>...</one></any-tag>"#).unwrap();
245//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag><one>...</one><two>...</two></any-tag>"#).unwrap();
246//! ```
247//!
248//! All these structs can be used to deserialize from an XML on the
249//! left side depending on amount of information that you want to get.
250//! Of course, you can combine them with elements extractor structs (see below).
251//!
252//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
253//!
254//! NOTE: XML allows you to have an attribute and an element with the same name
255//! inside the one element. quick-xml deals with that by prepending a `@` prefix
256//! to the name of attributes.
257//! </div>
258//! </td>
259//! </tr>
260//! <!-- 4 ===================================================================================== -->
261//! <tr>
262//! <td>
263//! A typical XML with child elements. The root tag name does not matter:
264//!
265//! ```xml
266//! <any-tag>
267//!   <one>...</one>
268//!   <two>...</two>
269//! </any-tag>
270//! ```
271//! </td>
272//! <td>
273//! A structure where each XML child element is mapped to the field.
274//! Each element name becomes a name of field. The name of the struct itself
275//! does not matter:
276//!
277//! ```
278//! # use serde::Deserialize;
279//! # type T = ();
280//! # type U = ();
281//! // Get both elements
282//! # #[derive(Debug, PartialEq)]
283//! #[derive(Deserialize)]
284//! struct AnyName {
285//!   one: T,
286//!   two: U,
287//! }
288//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag><one>...</one><two>...</two></any-tag>"#).unwrap();
289//! #
290//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..." two="..."/>"#).unwrap_err();
291//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..."><two>...</two></any-tag>"#).unwrap_err();
292//! ```
293//! ```
294//! # use serde::Deserialize;
295//! # type T = ();
296//! // Get only the one element, ignore the other
297//! # #[derive(Debug, PartialEq)]
298//! #[derive(Deserialize)]
299//! struct AnyName {
300//!   one: T,
301//! }
302//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag><one>...</one><two>...</two></any-tag>"#).unwrap();
303//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..."><one>...</one></any-tag>"#).unwrap();
304//! ```
305//! ```
306//! # use serde::Deserialize;
307//! // Ignore all elements
308//! // You can also use the `()` type (unit type)
309//! # #[derive(Debug, PartialEq)]
310//! #[derive(Deserialize)]
311//! struct AnyName;
312//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..." two="..."/>"#).unwrap();
313//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag><one>...</one><two>...</two></any-tag>"#).unwrap();
314//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..."><two>...</two></any-tag>"#).unwrap();
315//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..."><one>...</one></any-tag>"#).unwrap();
316//! ```
317//!
318//! All these structs can be used to deserialize from an XML on the
319//! left side depending on amount of information that you want to get.
320//! Of course, you can combine them with attributes extractor structs (see above).
321//!
322//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
323//!
324//! NOTE: XML allows you to have an attribute and an element with the same name
325//! inside the one element. quick-xml deals with that by prepending a `@` prefix
326//! to the name of attributes.
327//! </div>
328//! </td>
329//! </tr>
330//! <!-- 5 ===================================================================================== -->
331//! <tr>
332//! <td>
333//! An XML with an attribute and a child element named equally:
334//!
335//! ```xml
336//! <any-tag field="...">
337//!   <field>...</field>
338//! </any-tag>
339//! ```
340//! </td>
341//! <td>
342//!
343//! You MUST specify `#[serde(rename = "@field")]` on a field that will be used
344//! for an attribute:
345//!
346//! ```
347//! # use pretty_assertions::assert_eq;
348//! # use serde::Deserialize;
349//! # type T = ();
350//! # type U = ();
351//! # #[derive(Debug, PartialEq)]
352//! #[derive(Deserialize)]
353//! struct AnyName {
354//!   #[serde(rename = "@field")]
355//!   attribute: T,
356//!   field: U,
357//! }
358//! # assert_eq!(
359//! #   AnyName { attribute: (), field: () },
360//! #   quick_xml::de::from_str(r#"
361//! #     <any-tag field="...">
362//! #       <field>...</field>
363//! #     </any-tag>
364//! #   "#).unwrap(),
365//! # );
366//! ```
367//! </td>
368//! </tr>
369//! <!-- ======================================================================================= -->
370//! <tr><th colspan="2">
371//!
372//! ## Optional attributes and elements
373//!
374//! </th></tr>
375//! <tr><th>To parse all these XML's...</th><th>...use these Rust type(s)</th></tr>
376//! <!-- 6 ===================================================================================== -->
377//! <tr>
378//! <td>
379//! An optional XML attribute that you want to capture.
380//! The root tag name does not matter:
381//!
382//! ```xml
383//! <any-tag optional="..."/>
384//! ```
385//! ```xml
386//! <any-tag/>
387//! ```
388//! </td>
389//! <td>
390//!
391//! A structure with an optional field, renamed according to the requirements
392//! for attributes:
393//!
394//! ```
395//! # use pretty_assertions::assert_eq;
396//! # use serde::Deserialize;
397//! # type T = ();
398//! # #[derive(Debug, PartialEq)]
399//! #[derive(Deserialize)]
400//! struct AnyName {
401//!   #[serde(rename = "@optional")]
402//!   optional: Option<T>,
403//! }
404//! # assert_eq!(AnyName { optional: Some(()) }, quick_xml::de::from_str(r#"<any-tag optional="..."/>"#).unwrap());
405//! # assert_eq!(AnyName { optional: None     }, quick_xml::de::from_str(r#"<any-tag/>"#).unwrap());
406//! ```
407//! When the XML attribute is present, type `T` will be deserialized from
408//! an attribute value (which is a string). Note, that if `T = String` or other
409//! string type, the empty attribute is mapped to a `Some("")`, whereas `None`
410//! represents the missed attribute:
411//! ```xml
412//! <any-tag optional="..."/><!-- Some("...") -->
413//! <any-tag optional=""/>   <!-- Some("") -->
414//! <any-tag/>               <!-- None -->
415//! ```
416//! </td>
417//! </tr>
418//! <!-- 7 ===================================================================================== -->
419//! <tr>
420//! <td>
421//! An optional XML elements that you want to capture.
422//! The root tag name does not matter:
423//!
424//! ```xml
425//! <any-tag/>
426//!   <optional>...</optional>
427//! </any-tag>
428//! ```
429//! ```xml
430//! <any-tag/>
431//!   <optional/>
432//! </any-tag>
433//! ```
434//! ```xml
435//! <any-tag/>
436//! ```
437//! </td>
438//! <td>
439//!
440//! A structure with an optional field:
441//!
442//! ```
443//! # use pretty_assertions::assert_eq;
444//! # use serde::Deserialize;
445//! # type T = ();
446//! # #[derive(Debug, PartialEq)]
447//! #[derive(Deserialize)]
448//! struct AnyName {
449//!   optional: Option<T>,
450//! }
451//! # assert_eq!(AnyName { optional: Some(()) }, quick_xml::de::from_str(r#"<any-tag><optional>...</optional></any-tag>"#).unwrap());
452//! # assert_eq!(AnyName { optional: None     }, quick_xml::de::from_str(r#"<any-tag/>"#).unwrap());
453//! ```
454//! When the XML element is present, type `T` will be deserialized from an
455//! element (which is a string or a multi-mapping -- i.e. mapping which can have
456//! duplicated keys).
457//! <div style="background:rgba(80, 240, 100, 0.20);padding:0.75em;">
458//!
459//! Currently some edge cases exists described in the issue [#497].
460//! </div>
461//! </td>
462//! </tr>
463//! <!-- ======================================================================================= -->
464//! <tr><th colspan="2">
465//!
466//! ## Choices (`xs:choice` XML Schema type)
467//!
468//! </th></tr>
469//! <tr><th>To parse all these XML's...</th><th>...use these Rust type(s)</th></tr>
470//! <!-- 8 ===================================================================================== -->
471//! <tr>
472//! <td>
473//! An XML with different root tag names, as well as text / CDATA content:
474//!
475//! ```xml
476//! <one field1="...">...</one>
477//! ```
478//! ```xml
479//! <two>
480//!   <field2>...</field2>
481//! </two>
482//! ```
483//! ```xml
484//! Text <![CDATA[or (mixed)
485//! CDATA]]> content
486//! ```
487//! </td>
488//! <td>
489//!
490//! An enum where each variant has the name of a possible root tag. The name of
491//! the enum itself does not matter.
492//!
493//! If you need to get the textual content, mark a variant with `#[serde(rename = "$text")]`.
494//!
495//! All these structs can be used to deserialize from any XML on the
496//! left side depending on amount of information that you want to get:
497//!
498//! ```
499//! # use pretty_assertions::assert_eq;
500//! # use serde::Deserialize;
501//! # type T = ();
502//! # type U = ();
503//! # #[derive(Debug, PartialEq)]
504//! #[derive(Deserialize)]
505//! #[serde(rename_all = "snake_case")]
506//! enum AnyName {
507//!   One { #[serde(rename = "@field1")] field1: T },
508//!   Two { field2: U },
509//!
510//!   /// Use unit variant, if you do not care of a content.
511//!   /// You can use tuple variant if you want to parse
512//!   /// textual content as an xs:list.
513//!   /// Struct variants are will pass a string to the
514//!   /// struct enum variant visitor, which typically
515//!   /// returns Err(Custom)
516//!   #[serde(rename = "$text")]
517//!   Text(String),
518//! }
519//! # assert_eq!(AnyName::One { field1: () }, quick_xml::de::from_str(r#"<one field1="...">...</one>"#).unwrap());
520//! # assert_eq!(AnyName::Two { field2: () }, quick_xml::de::from_str(r#"<two><field2>...</field2></two>"#).unwrap());
521//! # assert_eq!(AnyName::Text("text  cdata ".into()), quick_xml::de::from_str(r#"text <![CDATA[ cdata ]]>"#).unwrap());
522//! ```
523//! ```
524//! # use pretty_assertions::assert_eq;
525//! # use serde::Deserialize;
526//! # type T = ();
527//! # #[derive(Debug, PartialEq)]
528//! #[derive(Deserialize)]
529//! struct Two {
530//!   field2: T,
531//! }
532//! # #[derive(Debug, PartialEq)]
533//! #[derive(Deserialize)]
534//! #[serde(rename_all = "snake_case")]
535//! enum AnyName {
536//!   // `field1` content discarded
537//!   One,
538//!   Two(Two),
539//!   #[serde(rename = "$text")]
540//!   Text,
541//! }
542//! # assert_eq!(AnyName::One,                     quick_xml::de::from_str(r#"<one field1="...">...</one>"#).unwrap());
543//! # assert_eq!(AnyName::Two(Two { field2: () }), quick_xml::de::from_str(r#"<two><field2>...</field2></two>"#).unwrap());
544//! # assert_eq!(AnyName::Text,                    quick_xml::de::from_str(r#"text <![CDATA[ cdata ]]>"#).unwrap());
545//! ```
546//! ```
547//! # use pretty_assertions::assert_eq;
548//! # use serde::Deserialize;
549//! # #[derive(Debug, PartialEq)]
550//! #[derive(Deserialize)]
551//! #[serde(rename_all = "snake_case")]
552//! enum AnyName {
553//!   One,
554//!   // the <two> and textual content will be mapped to this
555//!   #[serde(other)]
556//!   Other,
557//! }
558//! # assert_eq!(AnyName::One,   quick_xml::de::from_str(r#"<one field1="...">...</one>"#).unwrap());
559//! # assert_eq!(AnyName::Other, quick_xml::de::from_str(r#"<two><field2>...</field2></two>"#).unwrap());
560//! # assert_eq!(AnyName::Other, quick_xml::de::from_str(r#"text <![CDATA[ cdata ]]>"#).unwrap());
561//! ```
562//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
563//!
564//! NOTE: You should have variants for all possible tag names in your enum
565//! or have an `#[serde(other)]` variant.
566//! <!-- TODO: document an error type if that requirement is violated -->
567//! </div>
568//! </td>
569//! </tr>
570//! <!-- 9 ===================================================================================== -->
571//! <tr>
572//! <td>
573//!
574//! `<xs:choice>` embedded in the other element, and at the same time you want
575//! to get access to other attributes that can appear in the same container
576//! (`<any-tag>`). Also this case can be described, as if you want to choose
577//! Rust enum variant based on a tag name:
578//!
579//! ```xml
580//! <any-tag field="...">
581//!   <one>...</one>
582//! </any-tag>
583//! ```
584//! ```xml
585//! <any-tag field="...">
586//!   <two>...</two>
587//! </any-tag>
588//! ```
589//! ```xml
590//! <any-tag field="...">
591//!   Text <![CDATA[or (mixed)
592//!   CDATA]]> content
593//! </any-tag>
594//! ```
595//! </td>
596//! <td>
597//!
598//! A structure with a field which type is an `enum`.
599//!
600//! If you need to get a textual content, mark a variant with `#[serde(rename = "$text")]`.
601//!
602//! Names of the enum, struct, and struct field with `Choice` type does not matter:
603//!
604//! ```
605//! # use pretty_assertions::assert_eq;
606//! # use serde::Deserialize;
607//! # type T = ();
608//! # #[derive(Debug, PartialEq)]
609//! #[derive(Deserialize)]
610//! #[serde(rename_all = "snake_case")]
611//! enum Choice {
612//!   One,
613//!   Two,
614//!
615//!   /// Use unit variant, if you do not care of a content.
616//!   /// You can use tuple variant if you want to parse
617//!   /// textual content as an xs:list.
618//!   /// Struct variants are will pass a string to the
619//!   /// struct enum variant visitor, which typically
620//!   /// returns Err(Custom)
621//!   #[serde(rename = "$text")]
622//!   Text(String),
623//! }
624//! # #[derive(Debug, PartialEq)]
625//! #[derive(Deserialize)]
626//! struct AnyName {
627//!   #[serde(rename = "@field")]
628//!   field: T,
629//!
630//!   #[serde(rename = "$value")]
631//!   any_name: Choice,
632//! }
633//! # assert_eq!(
634//! #   AnyName { field: (), any_name: Choice::One },
635//! #   quick_xml::de::from_str(r#"<any-tag field="..."><one>...</one></any-tag>"#).unwrap(),
636//! # );
637//! # assert_eq!(
638//! #   AnyName { field: (), any_name: Choice::Two },
639//! #   quick_xml::de::from_str(r#"<any-tag field="..."><two>...</two></any-tag>"#).unwrap(),
640//! # );
641//! # assert_eq!(
642//! #   AnyName { field: (), any_name: Choice::Text("text  cdata ".into()) },
643//! #   quick_xml::de::from_str(r#"<any-tag field="...">text <![CDATA[ cdata ]]></any-tag>"#).unwrap(),
644//! # );
645//! ```
646//! </td>
647//! </tr>
648//! <!-- 10 ==================================================================================== -->
649//! <tr>
650//! <td>
651//!
652//! `<xs:choice>` embedded in the other element, and at the same time you want
653//! to get access to other elements that can appear in the same container
654//! (`<any-tag>`). Also this case can be described, as if you want to choose
655//! Rust enum variant based on a tag name:
656//!
657//! ```xml
658//! <any-tag>
659//!   <field>...</field>
660//!   <one>...</one>
661//! </any-tag>
662//! ```
663//! ```xml
664//! <any-tag>
665//!   <two>...</two>
666//!   <field>...</field>
667//! </any-tag>
668//! ```
669//! </td>
670//! <td>
671//!
672//! A structure with a field which type is an `enum`.
673//!
674//! Names of the enum, struct, and struct field with `Choice` type does not matter:
675//!
676//! ```
677//! # use pretty_assertions::assert_eq;
678//! # use serde::Deserialize;
679//! # type T = ();
680//! # #[derive(Debug, PartialEq)]
681//! #[derive(Deserialize)]
682//! #[serde(rename_all = "snake_case")]
683//! enum Choice {
684//!   One,
685//!   Two,
686//! }
687//! # #[derive(Debug, PartialEq)]
688//! #[derive(Deserialize)]
689//! struct AnyName {
690//!   field: T,
691//!
692//!   #[serde(rename = "$value")]
693//!   any_name: Choice,
694//! }
695//! # assert_eq!(
696//! #   AnyName { field: (), any_name: Choice::One },
697//! #   quick_xml::de::from_str(r#"<any-tag><field>...</field><one>...</one></any-tag>"#).unwrap(),
698//! # );
699//! # assert_eq!(
700//! #   AnyName { field: (), any_name: Choice::Two },
701//! #   quick_xml::de::from_str(r#"<any-tag><two>...</two><field>...</field></any-tag>"#).unwrap(),
702//! # );
703//! ```
704//!
705//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
706//!
707//! NOTE: if your `Choice` enum would contain an `#[serde(other)]`
708//! variant, element `<field>` will be mapped to the `field` and not to the enum
709//! variant.
710//! </div>
711//!
712//! </td>
713//! </tr>
714//! <!-- 11 ==================================================================================== -->
715//! <tr>
716//! <td>
717//!
718//! `<xs:choice>` encapsulated in other element with a fixed name:
719//!
720//! ```xml
721//! <any-tag field="...">
722//!   <choice>
723//!     <one>...</one>
724//!   </choice>
725//! </any-tag>
726//! ```
727//! ```xml
728//! <any-tag field="...">
729//!   <choice>
730//!     <two>...</two>
731//!   </choice>
732//! </any-tag>
733//! ```
734//! </td>
735//! <td>
736//!
737//! A structure with a field of an intermediate type with one field of `enum` type.
738//! Actually, this example is not necessary, because you can construct it by yourself
739//! using the composition rules that were described above. However the XML construction
740//! described here is very common, so it is shown explicitly.
741//!
742//! Names of the enum and struct does not matter:
743//!
744//! ```
745//! # use pretty_assertions::assert_eq;
746//! # use serde::Deserialize;
747//! # type T = ();
748//! # #[derive(Debug, PartialEq)]
749//! #[derive(Deserialize)]
750//! #[serde(rename_all = "snake_case")]
751//! enum Choice {
752//!   One,
753//!   Two,
754//! }
755//! # #[derive(Debug, PartialEq)]
756//! #[derive(Deserialize)]
757//! struct Holder {
758//!   #[serde(rename = "$value")]
759//!   any_name: Choice,
760//! }
761//! # #[derive(Debug, PartialEq)]
762//! #[derive(Deserialize)]
763//! struct AnyName {
764//!   #[serde(rename = "@field")]
765//!   field: T,
766//!
767//!   choice: Holder,
768//! }
769//! # assert_eq!(
770//! #   AnyName { field: (), choice: Holder { any_name: Choice::One } },
771//! #   quick_xml::de::from_str(r#"<any-tag field="..."><choice><one>...</one></choice></any-tag>"#).unwrap(),
772//! # );
773//! # assert_eq!(
774//! #   AnyName { field: (), choice: Holder { any_name: Choice::Two } },
775//! #   quick_xml::de::from_str(r#"<any-tag field="..."><choice><two>...</two></choice></any-tag>"#).unwrap(),
776//! # );
777//! ```
778//! </td>
779//! </tr>
780//! <!-- 12 ==================================================================================== -->
781//! <tr>
782//! <td>
783//!
784//! `<xs:choice>` encapsulated in other element with a fixed name:
785//!
786//! ```xml
787//! <any-tag>
788//!   <field>...</field>
789//!   <choice>
790//!     <one>...</one>
791//!   </choice>
792//! </any-tag>
793//! ```
794//! ```xml
795//! <any-tag>
796//!   <choice>
797//!     <two>...</two>
798//!   </choice>
799//!   <field>...</field>
800//! </any-tag>
801//! ```
802//! </td>
803//! <td>
804//!
805//! A structure with a field of an intermediate type with one field of `enum` type.
806//! Actually, this example is not necessary, because you can construct it by yourself
807//! using the composition rules that were described above. However the XML construction
808//! described here is very common, so it is shown explicitly.
809//!
810//! Names of the enum and struct does not matter:
811//!
812//! ```
813//! # use pretty_assertions::assert_eq;
814//! # use serde::Deserialize;
815//! # type T = ();
816//! # #[derive(Debug, PartialEq)]
817//! #[derive(Deserialize)]
818//! #[serde(rename_all = "snake_case")]
819//! enum Choice {
820//!   One,
821//!   Two,
822//! }
823//! # #[derive(Debug, PartialEq)]
824//! #[derive(Deserialize)]
825//! struct Holder {
826//!   #[serde(rename = "$value")]
827//!   any_name: Choice,
828//! }
829//! # #[derive(Debug, PartialEq)]
830//! #[derive(Deserialize)]
831//! struct AnyName {
832//!   field: T,
833//!
834//!   choice: Holder,
835//! }
836//! # assert_eq!(
837//! #   AnyName { field: (), choice: Holder { any_name: Choice::One } },
838//! #   quick_xml::de::from_str(r#"<any-tag><field>...</field><choice><one>...</one></choice></any-tag>"#).unwrap(),
839//! # );
840//! # assert_eq!(
841//! #   AnyName { field: (), choice: Holder { any_name: Choice::Two } },
842//! #   quick_xml::de::from_str(r#"<any-tag><choice><two>...</two></choice><field>...</field></any-tag>"#).unwrap(),
843//! # );
844//! ```
845//! </td>
846//! </tr>
847//! <!-- ======================================================================================== -->
848//! <tr><th colspan="2">
849//!
850//! ## Sequences (`xs:all` and `xs:sequence` XML Schema types)
851//!
852//! </th></tr>
853//! <tr><th>To parse all these XML's...</th><th>...use these Rust type(s)</th></tr>
854//! <!-- 13 ==================================================================================== -->
855//! <tr>
856//! <td>
857//! A sequence inside of a tag without a dedicated name:
858//!
859//! ```xml
860//! <any-tag/>
861//! ```
862//! ```xml
863//! <any-tag>
864//!   <item/>
865//! </any-tag>
866//! ```
867//! ```xml
868//! <any-tag>
869//!   <item/>
870//!   <item/>
871//!   <item/>
872//! </any-tag>
873//! ```
874//! </td>
875//! <td>
876//!
877//! A structure with a field which is a sequence type, for example, [`Vec`].
878//! Because XML syntax does not distinguish between empty sequences and missed
879//! elements, we should indicate that on the Rust side, because serde will require
880//! that field `item` exists. You can do that in two possible ways:
881//!
882//! Use the `#[serde(default)]` attribute for a [field] or the entire [struct]:
883//! ```
884//! # use pretty_assertions::assert_eq;
885//! # use serde::Deserialize;
886//! # type Item = ();
887//! # #[derive(Debug, PartialEq)]
888//! #[derive(Deserialize)]
889//! struct AnyName {
890//!   #[serde(default)]
891//!   item: Vec<Item>,
892//! }
893//! # assert_eq!(
894//! #   AnyName { item: vec![] },
895//! #   quick_xml::de::from_str(r#"<any-tag/>"#).unwrap(),
896//! # );
897//! # assert_eq!(
898//! #   AnyName { item: vec![()] },
899//! #   quick_xml::de::from_str(r#"<any-tag><item/></any-tag>"#).unwrap(),
900//! # );
901//! # assert_eq!(
902//! #   AnyName { item: vec![(), (), ()] },
903//! #   quick_xml::de::from_str(r#"<any-tag><item/><item/><item/></any-tag>"#).unwrap(),
904//! # );
905//! ```
906//!
907//! Use the [`Option`]. In that case inner array will always contains at least one
908//! element after deserialization:
909//! ```ignore
910//! # use pretty_assertions::assert_eq;
911//! # use serde::Deserialize;
912//! # type Item = ();
913//! # #[derive(Debug, PartialEq)]
914//! #[derive(Deserialize)]
915//! struct AnyName {
916//!   item: Option<Vec<Item>>,
917//! }
918//! # assert_eq!(
919//! #   AnyName { item: None },
920//! #   quick_xml::de::from_str(r#"<any-tag/>"#).unwrap(),
921//! # );
922//! # assert_eq!(
923//! #   AnyName { item: Some(vec![()]) },
924//! #   quick_xml::de::from_str(r#"<any-tag><item/></any-tag>"#).unwrap(),
925//! # );
926//! # assert_eq!(
927//! #   AnyName { item: Some(vec![(), (), ()]) },
928//! #   quick_xml::de::from_str(r#"<any-tag><item/><item/><item/></any-tag>"#).unwrap(),
929//! # );
930//! ```
931//!
932//! See also [Frequently Used Patterns](#element-lists).
933//!
934//! [field]: https://serde.rs/field-attrs.html#default
935//! [struct]: https://serde.rs/container-attrs.html#default
936//! </td>
937//! </tr>
938//! <!-- 14 ==================================================================================== -->
939//! <tr>
940//! <td>
941//! A sequence with a strict order, probably with mixed content
942//! (text / CDATA and tags):
943//!
944//! ```xml
945//! <one>...</one>
946//! text
947//! <![CDATA[cdata]]>
948//! <two>...</two>
949//! <one>...</one>
950//! ```
951//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
952//!
953//! NOTE: this is just an example for showing mapping. XML does not allow
954//! multiple root tags -- you should wrap the sequence into a tag.
955//! </div>
956//! </td>
957//! <td>
958//!
959//! All elements mapped to the heterogeneous sequential type: tuple or named tuple.
960//! Each element of the tuple should be able to be deserialized from the nested
961//! element content (`...`), except the enum types which would be deserialized
962//! from the full element (`<one>...</one>`), so they could use the element name
963//! to choose the right variant:
964//!
965//! ```
966//! # use pretty_assertions::assert_eq;
967//! # use serde::Deserialize;
968//! # type One = ();
969//! # type Two = ();
970//! # /*
971//! type One = ...;
972//! type Two = ...;
973//! # */
974//! # #[derive(Debug, PartialEq)]
975//! #[derive(Deserialize)]
976//! struct AnyName(One, String, Two, One);
977//! # assert_eq!(
978//! #   AnyName((), "text cdata".into(), (), ()),
979//! #   quick_xml::de::from_str(r#"<one>...</one>text <![CDATA[cdata]]><two>...</two><one>...</one>"#).unwrap(),
980//! # );
981//! ```
982//! ```
983//! # use pretty_assertions::assert_eq;
984//! # use serde::Deserialize;
985//! # #[derive(Debug, PartialEq)]
986//! #[derive(Deserialize)]
987//! #[serde(rename_all = "snake_case")]
988//! enum Choice {
989//!   One,
990//! }
991//! # type Two = ();
992//! # /*
993//! type Two = ...;
994//! # */
995//! type AnyName = (Choice, String, Two, Choice);
996//! # assert_eq!(
997//! #   (Choice::One, "text cdata".to_string(), (), Choice::One),
998//! #   quick_xml::de::from_str(r#"<one>...</one>text <![CDATA[cdata]]><two>...</two><one>...</one>"#).unwrap(),
999//! # );
1000//! ```
1001//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
1002//!
1003//! NOTE: consequent text and CDATA nodes are merged into the one text node,
1004//! so you cannot have two adjacent string types in your sequence.
1005//!
1006//! NOTE: In the case that the list might contain tags that are overlapped with
1007//! tags that do not correspond to the list you should add the feature [`overlapped-lists`].
1008//! </div>
1009//! </td>
1010//! </tr>
1011//! <!-- 15 ==================================================================================== -->
1012//! <tr>
1013//! <td>
1014//! A sequence with a non-strict order, probably with a mixed content
1015//! (text / CDATA and tags).
1016//!
1017//! ```xml
1018//! <one>...</one>
1019//! text
1020//! <![CDATA[cdata]]>
1021//! <two>...</two>
1022//! <one>...</one>
1023//! ```
1024//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
1025//!
1026//! NOTE: this is just an example for showing mapping. XML does not allow
1027//! multiple root tags -- you should wrap the sequence into a tag.
1028//! </div>
1029//! </td>
1030//! <td>
1031//! A homogeneous sequence of elements with a fixed or dynamic size:
1032//!
1033//! ```
1034//! # use pretty_assertions::assert_eq;
1035//! # use serde::Deserialize;
1036//! # #[derive(Debug, PartialEq)]
1037//! #[derive(Deserialize)]
1038//! #[serde(rename_all = "snake_case")]
1039//! enum Choice {
1040//!   One,
1041//!   Two,
1042//!   #[serde(other)]
1043//!   Other,
1044//! }
1045//! type AnyName = [Choice; 4];
1046//! # assert_eq!(
1047//! #   [Choice::One, Choice::Other, Choice::Two, Choice::One],
1048//! #   quick_xml::de::from_str::<AnyName>(r#"<one>...</one>text <![CDATA[cdata]]><two>...</two><one>...</one>"#).unwrap(),
1049//! # );
1050//! ```
1051//! ```
1052//! # use pretty_assertions::assert_eq;
1053//! # use serde::Deserialize;
1054//! # #[derive(Debug, PartialEq)]
1055//! #[derive(Deserialize)]
1056//! #[serde(rename_all = "snake_case")]
1057//! enum Choice {
1058//!   One,
1059//!   Two,
1060//!   #[serde(rename = "$text")]
1061//!   Other(String),
1062//! }
1063//! type AnyName = Vec<Choice>;
1064//! # assert_eq!(
1065//! #   vec![
1066//! #     Choice::One,
1067//! #     Choice::Other("text cdata".into()),
1068//! #     Choice::Two,
1069//! #     Choice::One,
1070//! #   ],
1071//! #   quick_xml::de::from_str::<AnyName>(r#"<one>...</one>text <![CDATA[cdata]]><two>...</two><one>...</one>"#).unwrap(),
1072//! # );
1073//! ```
1074//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
1075//!
1076//! NOTE: consequent text and CDATA nodes are merged into the one text node,
1077//! so you cannot have two adjacent string types in your sequence.
1078//! </div>
1079//! </td>
1080//! </tr>
1081//! <!-- 16 ==================================================================================== -->
1082//! <tr>
1083//! <td>
1084//! A sequence with a strict order, probably with a mixed content,
1085//! (text and tags) inside of the other element:
1086//!
1087//! ```xml
1088//! <any-tag attribute="...">
1089//!   <one>...</one>
1090//!   text
1091//!   <![CDATA[cdata]]>
1092//!   <two>...</two>
1093//!   <one>...</one>
1094//! </any-tag>
1095//! ```
1096//! </td>
1097//! <td>
1098//!
1099//! A structure where all child elements mapped to the one field which have
1100//! a heterogeneous sequential type: tuple or named tuple. Each element of the
1101//! tuple should be able to be deserialized from the full element (`<one>...</one>`).
1102//!
1103//! You MUST specify `#[serde(rename = "$value")]` on that field:
1104//!
1105//! ```
1106//! # use pretty_assertions::assert_eq;
1107//! # use serde::Deserialize;
1108//! # type One = ();
1109//! # type Two = ();
1110//! # /*
1111//! type One = ...;
1112//! type Two = ...;
1113//! # */
1114//!
1115//! # #[derive(Debug, PartialEq)]
1116//! #[derive(Deserialize)]
1117//! struct AnyName {
1118//!   #[serde(rename = "@attribute")]
1119//! # attribute: (),
1120//! # /*
1121//!   attribute: ...,
1122//! # */
1123//!   // Does not (yet?) supported by the serde
1124//!   // https://github.com/serde-rs/serde/issues/1905
1125//!   // #[serde(flatten)]
1126//!   #[serde(rename = "$value")]
1127//!   any_name: (One, String, Two, One),
1128//! }
1129//! # assert_eq!(
1130//! #   AnyName { attribute: (), any_name: ((), "text cdata".into(), (), ()) },
1131//! #   quick_xml::de::from_str("\
1132//! #     <any-tag attribute='...'>\
1133//! #       <one>...</one>\
1134//! #       text \
1135//! #       <![CDATA[cdata]]>\
1136//! #       <two>...</two>\
1137//! #       <one>...</one>\
1138//! #     </any-tag>"
1139//! #   ).unwrap(),
1140//! # );
1141//! ```
1142//! ```
1143//! # use pretty_assertions::assert_eq;
1144//! # use serde::Deserialize;
1145//! # type One = ();
1146//! # type Two = ();
1147//! # /*
1148//! type One = ...;
1149//! type Two = ...;
1150//! # */
1151//!
1152//! # #[derive(Debug, PartialEq)]
1153//! #[derive(Deserialize)]
1154//! struct NamedTuple(One, String, Two, One);
1155//!
1156//! # #[derive(Debug, PartialEq)]
1157//! #[derive(Deserialize)]
1158//! struct AnyName {
1159//!   #[serde(rename = "@attribute")]
1160//! # attribute: (),
1161//! # /*
1162//!   attribute: ...,
1163//! # */
1164//!   // Does not (yet?) supported by the serde
1165//!   // https://github.com/serde-rs/serde/issues/1905
1166//!   // #[serde(flatten)]
1167//!   #[serde(rename = "$value")]
1168//!   any_name: NamedTuple,
1169//! }
1170//! # assert_eq!(
1171//! #   AnyName { attribute: (), any_name: NamedTuple((), "text cdata".into(), (), ()) },
1172//! #   quick_xml::de::from_str("\
1173//! #     <any-tag attribute='...'>\
1174//! #       <one>...</one>\
1175//! #       text \
1176//! #       <![CDATA[cdata]]>\
1177//! #       <two>...</two>\
1178//! #       <one>...</one>\
1179//! #     </any-tag>"
1180//! #   ).unwrap(),
1181//! # );
1182//! ```
1183//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
1184//!
1185//! NOTE: consequent text and CDATA nodes are merged into the one text node,
1186//! so you cannot have two adjacent string types in your sequence.
1187//! </div>
1188//! </td>
1189//! </tr>
1190//! <!-- 17 ==================================================================================== -->
1191//! <tr>
1192//! <td>
1193//! A sequence with a non-strict order, probably with a mixed content
1194//! (text / CDATA and tags) inside of the other element:
1195//!
1196//! ```xml
1197//! <any-tag>
1198//!   <one>...</one>
1199//!   text
1200//!   <![CDATA[cdata]]>
1201//!   <two>...</two>
1202//!   <one>...</one>
1203//! </any-tag>
1204//! ```
1205//! </td>
1206//! <td>
1207//!
1208//! A structure where all child elements mapped to the one field which have
1209//! a homogeneous sequential type: array-like container. A container type `T`
1210//! should be able to be deserialized from the nested element content (`...`),
1211//! except if it is an enum type which would be deserialized from the full
1212//! element (`<one>...</one>`).
1213//!
1214//! You MUST specify `#[serde(rename = "$value")]` on that field:
1215//!
1216//! ```
1217//! # use pretty_assertions::assert_eq;
1218//! # use serde::Deserialize;
1219//! # #[derive(Debug, PartialEq)]
1220//! #[derive(Deserialize)]
1221//! #[serde(rename_all = "snake_case")]
1222//! enum Choice {
1223//!   One,
1224//!   Two,
1225//!   #[serde(rename = "$text")]
1226//!   Other(String),
1227//! }
1228//! # #[derive(Debug, PartialEq)]
1229//! #[derive(Deserialize)]
1230//! struct AnyName {
1231//!   #[serde(rename = "@attribute")]
1232//! # attribute: (),
1233//! # /*
1234//!   attribute: ...,
1235//! # */
1236//!   // Does not (yet?) supported by the serde
1237//!   // https://github.com/serde-rs/serde/issues/1905
1238//!   // #[serde(flatten)]
1239//!   #[serde(rename = "$value")]
1240//!   any_name: [Choice; 4],
1241//! }
1242//! # assert_eq!(
1243//! #   AnyName { attribute: (), any_name: [
1244//! #     Choice::One,
1245//! #     Choice::Other("text cdata".into()),
1246//! #     Choice::Two,
1247//! #     Choice::One,
1248//! #   ] },
1249//! #   quick_xml::de::from_str("\
1250//! #     <any-tag attribute='...'>\
1251//! #       <one>...</one>\
1252//! #       text \
1253//! #       <![CDATA[cdata]]>\
1254//! #       <two>...</two>\
1255//! #       <one>...</one>\
1256//! #     </any-tag>"
1257//! #   ).unwrap(),
1258//! # );
1259//! ```
1260//! ```
1261//! # use pretty_assertions::assert_eq;
1262//! # use serde::Deserialize;
1263//! # #[derive(Debug, PartialEq)]
1264//! #[derive(Deserialize)]
1265//! #[serde(rename_all = "snake_case")]
1266//! enum Choice {
1267//!   One,
1268//!   Two,
1269//!   #[serde(rename = "$text")]
1270//!   Other(String),
1271//! }
1272//! # #[derive(Debug, PartialEq)]
1273//! #[derive(Deserialize)]
1274//! struct AnyName {
1275//!   #[serde(rename = "@attribute")]
1276//! # attribute: (),
1277//! # /*
1278//!   attribute: ...,
1279//! # */
1280//!   // Does not (yet?) supported by the serde
1281//!   // https://github.com/serde-rs/serde/issues/1905
1282//!   // #[serde(flatten)]
1283//!   #[serde(rename = "$value")]
1284//!   any_name: Vec<Choice>,
1285//! }
1286//! # assert_eq!(
1287//! #   AnyName { attribute: (), any_name: vec![
1288//! #     Choice::One,
1289//! #     Choice::Other("text cdata".into()),
1290//! #     Choice::Two,
1291//! #     Choice::One,
1292//! #   ] },
1293//! #   quick_xml::de::from_str("\
1294//! #     <any-tag attribute='...'>\
1295//! #       <one>...</one>\
1296//! #       text \
1297//! #       <![CDATA[cdata]]>\
1298//! #       <two>...</two>\
1299//! #       <one>...</one>\
1300//! #     </any-tag>"
1301//! #   ).unwrap(),
1302//! # );
1303//! ```
1304//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
1305//!
1306//! NOTE: consequent text and CDATA nodes are merged into the one text node,
1307//! so you cannot have two adjacent string types in your sequence.
1308//! </div>
1309//! </td>
1310//! </tr>
1311//! </tbody>
1312//! </table>
1313//!
1314//!
1315//! Generate Rust types from XML
1316//! ============================
1317//!
1318//! To speed up the creation of Rust types that represent a given XML file you can
1319//! use the [xml_schema_generator](https://github.com/Thomblin/xml_schema_generator).
1320//! It provides a standalone binary and a Rust library that parses one or more XML files
1321//! and generates a collection of structs that are compatible with quick_xml::de.
1322//!
1323//!
1324//!
1325//! Composition Rules
1326//! =================
1327//!
1328//! The XML format is very different from other formats supported by `serde`.
1329//! One such difference it is how data in the serialized form is related to
1330//! the Rust type. Usually each byte in the data can be associated only with
1331//! one field in the data structure. However, XML is an exception.
1332//!
1333//! For example, took this XML:
1334//!
1335//! ```xml
1336//! <any>
1337//!   <key attr="value"/>
1338//! </any>
1339//! ```
1340//!
1341//! and try to deserialize it to the struct `AnyName`:
1342//!
1343//! ```no_run
1344//! # use serde::Deserialize;
1345//! #[derive(Deserialize)]
1346//! struct AnyName { // AnyName calls `deserialize_struct` on `<any><key attr="value"/></any>`
1347//!                  //                         Used data:          ^^^^^^^^^^^^^^^^^^^
1348//!   key: Inner,    // Inner   calls `deserialize_struct` on `<key attr="value"/>`
1349//!                  //                         Used data:          ^^^^^^^^^^^^
1350//! }
1351//! #[derive(Deserialize)]
1352//! struct Inner {
1353//!   #[serde(rename = "@attr")]
1354//!   attr: String,  // String  calls `deserialize_string` on `value`
1355//!                  //                         Used data:     ^^^^^
1356//! }
1357//! ```
1358//!
1359//! Comments shows what methods of a [`Deserializer`] called by each struct
1360//! `deserialize` method and which input their seen. **Used data** shows, what
1361//! content is actually used for deserializing. As you see, name of the inner
1362//! `<key>` tag used both as a map key / outer struct field name and as part
1363//! of the inner struct (although _value_ of the tag, i.e. `key` is not used
1364//! by it).
1365//!
1366//!
1367//!
1368//! Enum Representations
1369//! ====================
1370//!
1371//! `quick-xml` represents enums differently in normal fields, `$text` fields and
1372//! `$value` fields. A normal representation is compatible with serde's adjacent
1373//! and internal tags feature -- tag for adjacently and internally tagged enums
1374//! are serialized using [`Serializer::serialize_unit_variant`] and deserialized
1375//! using [`Deserializer::deserialize_enum`].
1376//!
1377//! Use those simple rules to remember, how enum would be represented in XML:
1378//! - In `$value` field the representation is always the same as top-level representation;
1379//! - In `$text` field the representation is always the same as in normal field,
1380//!   but surrounding tags with field name are removed;
1381//! - In normal field the representation is always contains a tag with a field name.
1382//!
1383//! Normal enum variant
1384//! -------------------
1385//!
1386//! To model an `xs:choice` XML construct use `$value` field.
1387//! To model a top-level `xs:choice` just use the enum type.
1388//!
1389//! |Kind   |Top-level and in `$value` field          |In normal field      |In `$text` field     |
1390//! |-------|-----------------------------------------|---------------------|---------------------|
1391//! |Unit   |`<Unit/>`                                |`<field>Unit</field>`|`Unit`               |
1392//! |Newtype|`<Newtype>42</Newtype>`                  |Err(Custom) [^0]     |Err(Custom) [^0]     |
1393//! |Tuple  |`<Tuple>42</Tuple><Tuple>answer</Tuple>` |Err(Custom) [^0]     |Err(Custom) [^0]     |
1394//! |Struct |`<Struct><q>42</q><a>answer</a></Struct>`|Err(Custom) [^0]     |Err(Custom) [^0]     |
1395//!
1396//! `$text` enum variant
1397//! --------------------
1398//!
1399//! |Kind   |Top-level and in `$value` field          |In normal field      |In `$text` field     |
1400//! |-------|-----------------------------------------|---------------------|---------------------|
1401//! |Unit   |_(empty)_                                |`<field/>`           |_(empty)_            |
1402//! |Newtype|`42`                                     |Err(Custom) [^0] [^1]|Err(Custom) [^0] [^2]|
1403//! |Tuple  |`42 answer`                              |Err(Custom) [^0] [^3]|Err(Custom) [^0] [^4]|
1404//! |Struct |Err(Custom) [^0]                         |Err(Custom) [^0]     |Err(Custom) [^0]     |
1405//!
1406//! [^0]: Error is returned by the deserialized type. In case of derived implementation a `Custom`
1407//!       error will be returned, but custom deserialize implementation can successfully deserialize
1408//!       value from a string which will be passed to it.
1409//!
1410//! [^1]: If this serialize as `<field>42</field>` then it will be ambiguity during deserialization,
1411//!       because it clash with `Unit` representation in normal field.
1412//!
1413//! [^2]: If this serialize as `42` then it will be ambiguity during deserialization,
1414//!       because it clash with `Unit` representation in `$text` field.
1415//!
1416//! [^3]: If this serialize as `<field>42 answer</field>` then it will be ambiguity during deserialization,
1417//!       because it clash with `Unit` representation in normal field.
1418//!
1419//! [^4]: If this serialize as `42 answer` then it will be ambiguity during deserialization,
1420//!       because it clash with `Unit` representation in `$text` field.
1421//!
1422//!
1423//!
1424//! `$text` and `$value` special names
1425//! ==================================
1426//!
1427//! quick-xml supports two special names for fields -- `$text` and `$value`.
1428//! Although they may seem the same, there is a distinction. Two different
1429//! names is required mostly for serialization, because quick-xml should know
1430//! how you want to serialize certain constructs, which could be represented
1431//! through XML in multiple different ways.
1432//!
1433//! The only difference is in how complex types and sequences are serialized.
1434//! If you doubt which one you should select, begin with [`$value`](#value).
1435//!
1436//! ## `$text`
1437//! `$text` is used when you want to write your XML as a text or a CDATA content.
1438//! More formally, field with that name represents simple type definition with
1439//! `{variety} = atomic` or `{variety} = union` whose basic members are all atomic,
1440//! as described in the [specification].
1441//!
1442//! As a result, not all types of such fields can be serialized. Only serialization
1443//! of following types are supported:
1444//! - all primitive types (strings, numbers, booleans)
1445//! - unit variants of enumerations (serializes to a name of a variant)
1446//! - newtypes (delegates serialization to inner type)
1447//! - [`Option`] of above (`None` serializes to nothing)
1448//! - sequences (including tuples and tuple variants of enumerations) of above,
1449//!   excluding `None` and empty string elements (because it will not be possible
1450//!   to deserialize them back). The elements are separated by space(s)
1451//! - unit type `()` and unit structs (serializes to nothing)
1452//!
1453//! Complex types, such as structs and maps, are not supported in this field.
1454//! If you want them, you should use `$value`.
1455//!
1456//! Sequences serialized to a space-delimited string, that is why only certain
1457//! types are allowed in this mode:
1458//!
1459//! ```
1460//! # use serde::{Deserialize, Serialize};
1461//! # use quick_xml::de::from_str;
1462//! # use quick_xml::se::to_string;
1463//! #[derive(Deserialize, Serialize, PartialEq, Debug)]
1464//! struct AnyName {
1465//!     #[serde(rename = "$text")]
1466//!     field: Vec<usize>,
1467//! }
1468//!
1469//! let obj = AnyName { field: vec![1, 2, 3] };
1470//! let xml = to_string(&obj).unwrap();
1471//! assert_eq!(xml, "<AnyName>1 2 3</AnyName>");
1472//!
1473//! let object: AnyName = from_str(&xml).unwrap();
1474//! assert_eq!(object, obj);
1475//! ```
1476//!
1477//! ## `$value`
1478//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
1479//!
1480//! NOTE: a name `#content` would better explain the purpose of that field,
1481//! but `$value` is used for compatibility with other XML serde crates, which
1482//! uses that name. This will allow you to switch XML crates more smoothly if required.
1483//! </div>
1484//!
1485//! Representation of primitive types in `$value` does not differ from their
1486//! representation in `$text` field. The difference is how sequences are serialized.
1487//! `$value` serializes each sequence item as a separate XML element. The name
1488//! of that element is taken from serialized type, and because only `enum`s provide
1489//! such name (their variant name), only they should be used for such fields.
1490//!
1491//! `$value` fields does not support `struct` types with fields, the serialization
1492//! of such types would end with an `Err(Unsupported)`. Unit structs and unit
1493//! type `()` serializing to nothing and can be deserialized from any content.
1494//!
1495//! Serialization and deserialization of `$value` field performed as usual, except
1496//! that name for an XML element will be given by the serialized type, instead of
1497//! field. The latter allow to serialize enumerated types, where variant is encoded
1498//! as a tag name, and, so, represent an XSD `xs:choice` schema by the Rust `enum`.
1499//!
1500//! In the example below, field will be serialized as `<field/>`, because elements
1501//! get their names from the field name. It cannot be deserialized, because `Enum`
1502//! expects elements `<A/>`, `<B/>` or `<C/>`, but `AnyName` looked only for `<field/>`:
1503//!
1504//! ```
1505//! # use serde::{Deserialize, Serialize};
1506//! # use pretty_assertions::assert_eq;
1507//! # #[derive(PartialEq, Debug)]
1508//! #[derive(Deserialize, Serialize)]
1509//! enum Enum { A, B, C }
1510//!
1511//! # #[derive(PartialEq, Debug)]
1512//! #[derive(Deserialize, Serialize)]
1513//! struct AnyName {
1514//!     // <field>A</field>, <field>B</field>, or <field>C</field>
1515//!     field: Enum,
1516//! }
1517//! # assert_eq!(
1518//! #     quick_xml::se::to_string(&AnyName { field: Enum::A }).unwrap(),
1519//! #     "<AnyName><field>A</field></AnyName>",
1520//! # );
1521//! # assert_eq!(
1522//! #     AnyName { field: Enum::B },
1523//! #     quick_xml::de::from_str("<root><field>B</field></root>").unwrap(),
1524//! # );
1525//! ```
1526//!
1527//! If you rename field to `$value`, then `field` would be serialized as `<A/>`,
1528//! `<B/>` or `<C/>`, depending on the its content. It is also possible to
1529//! deserialize it from the same elements:
1530//!
1531//! ```
1532//! # use serde::{Deserialize, Serialize};
1533//! # use pretty_assertions::assert_eq;
1534//! # #[derive(Deserialize, Serialize, PartialEq, Debug)]
1535//! # enum Enum { A, B, C }
1536//! #
1537//! # #[derive(PartialEq, Debug)]
1538//! #[derive(Deserialize, Serialize)]
1539//! struct AnyName {
1540//!     // <A/>, <B/> or <C/>
1541//!     #[serde(rename = "$value")]
1542//!     field: Enum,
1543//! }
1544//! # assert_eq!(
1545//! #     quick_xml::se::to_string(&AnyName { field: Enum::A }).unwrap(),
1546//! #     "<AnyName><A/></AnyName>",
1547//! # );
1548//! # assert_eq!(
1549//! #     AnyName { field: Enum::B },
1550//! #     quick_xml::de::from_str("<root><B/></root>").unwrap(),
1551//! # );
1552//! ```
1553//!
1554//! ### Primitives and sequences of primitives
1555//!
1556//! Sequences serialized to a list of elements. Note, that types that does not
1557//! produce their own tag (i. e. primitives) will produce [`SeError::Unsupported`]
1558//! if they contains more that one element, because such sequence cannot be
1559//! deserialized to the same value:
1560//!
1561//! ```
1562//! # use serde::{Deserialize, Serialize};
1563//! # use pretty_assertions::assert_eq;
1564//! # use quick_xml::de::from_str;
1565//! # use quick_xml::se::to_string;
1566//! #[derive(Deserialize, Serialize, PartialEq, Debug)]
1567//! struct AnyName {
1568//!     #[serde(rename = "$value")]
1569//!     field: Vec<usize>,
1570//! }
1571//!
1572//! let obj = AnyName { field: vec![1, 2, 3] };
1573//! // If this object were serialized, it would be represented as "<AnyName>123</AnyName>"
1574//! to_string(&obj).unwrap_err();
1575//!
1576//! let object: AnyName = from_str("<AnyName>123</AnyName>").unwrap();
1577//! assert_eq!(object, AnyName { field: vec![123] });
1578//!
1579//! // `1 2 3` is mapped to a single `usize` element
1580//! // It is impossible to deserialize list of primitives to such field
1581//! from_str::<AnyName>("<AnyName>1 2 3</AnyName>").unwrap_err();
1582//! ```
1583//!
1584//! A particular case of that example is a string `$value` field, which probably
1585//! would be a most used example of that attribute:
1586//!
1587//! ```
1588//! # use serde::{Deserialize, Serialize};
1589//! # use pretty_assertions::assert_eq;
1590//! # use quick_xml::de::from_str;
1591//! # use quick_xml::se::to_string;
1592//! #[derive(Deserialize, Serialize, PartialEq, Debug)]
1593//! struct AnyName {
1594//!     #[serde(rename = "$value")]
1595//!     field: String,
1596//! }
1597//!
1598//! let obj = AnyName { field: "content".to_string() };
1599//! let xml = to_string(&obj).unwrap();
1600//! assert_eq!(xml, "<AnyName>content</AnyName>");
1601//! ```
1602//!
1603//! ### Structs and sequences of structs
1604//!
1605//! Note, that structures do not have a serializable name as well (name of the
1606//! type is never used), so it is impossible to serialize non-unit struct or
1607//! sequence of non-unit structs in `$value` field. (sequences of) unit structs
1608//! are serialized as empty string, because units itself serializing
1609//! to nothing:
1610//!
1611//! ```
1612//! # use serde::{Deserialize, Serialize};
1613//! # use pretty_assertions::assert_eq;
1614//! # use quick_xml::de::from_str;
1615//! # use quick_xml::se::to_string;
1616//! #[derive(Deserialize, Serialize, PartialEq, Debug)]
1617//! struct Unit;
1618//!
1619//! #[derive(Deserialize, Serialize, PartialEq, Debug)]
1620//! struct AnyName {
1621//!     // #[serde(default)] is required to deserialization of empty lists
1622//!     // This is a general note, not related to $value
1623//!     #[serde(rename = "$value", default)]
1624//!     field: Vec<Unit>,
1625//! }
1626//!
1627//! let obj = AnyName { field: vec![Unit, Unit, Unit] };
1628//! let xml = to_string(&obj).unwrap();
1629//! assert_eq!(xml, "<AnyName/>");
1630//!
1631//! let object: AnyName = from_str("<AnyName/>").unwrap();
1632//! assert_eq!(object, AnyName { field: vec![] });
1633//!
1634//! let object: AnyName = from_str("<AnyName></AnyName>").unwrap();
1635//! assert_eq!(object, AnyName { field: vec![] });
1636//!
1637//! let object: AnyName = from_str("<AnyName><A/><B/><C/></AnyName>").unwrap();
1638//! assert_eq!(object, AnyName { field: vec![Unit, Unit, Unit] });
1639//! ```
1640//!
1641//! ### Enums and sequences of enums
1642//!
1643//! Enumerations uses the variant name as an element name:
1644//!
1645//! ```
1646//! # use serde::{Deserialize, Serialize};
1647//! # use pretty_assertions::assert_eq;
1648//! # use quick_xml::de::from_str;
1649//! # use quick_xml::se::to_string;
1650//! #[derive(Deserialize, Serialize, PartialEq, Debug)]
1651//! struct AnyName {
1652//!     #[serde(rename = "$value")]
1653//!     field: Vec<Enum>,
1654//! }
1655//!
1656//! #[derive(Deserialize, Serialize, PartialEq, Debug)]
1657//! enum Enum { A, B, C }
1658//!
1659//! let obj = AnyName { field: vec![Enum::A, Enum::B, Enum::C] };
1660//! let xml = to_string(&obj).unwrap();
1661//! assert_eq!(
1662//!     xml,
1663//!     "<AnyName>\
1664//!         <A/>\
1665//!         <B/>\
1666//!         <C/>\
1667//!      </AnyName>"
1668//! );
1669//!
1670//! let object: AnyName = from_str(&xml).unwrap();
1671//! assert_eq!(object, obj);
1672//! ```
1673//!
1674//! ----------------------------------------------------------------------------
1675//!
1676//! You can have either `$text` or `$value` field in your structs. Unfortunately,
1677//! that is not enforced, so you can theoretically have both, but you should
1678//! avoid that.
1679//!
1680//!
1681//!
1682//! Frequently Used Patterns
1683//! ========================
1684//!
1685//! Some XML constructs used so frequent, that it is worth to document the recommended
1686//! way to represent them in the Rust. The sections below describes them.
1687//!
1688//! `<element>` lists
1689//! -----------------
1690//! Many XML formats wrap lists of elements in the additional container,
1691//! although this is not required by the XML rules:
1692//!
1693//! ```xml
1694//! <root>
1695//!   <field1/>
1696//!   <field2/>
1697//!   <list><!-- Container -->
1698//!     <element/>
1699//!     <element/>
1700//!     <element/>
1701//!   </list>
1702//!   <field3/>
1703//! </root>
1704//! ```
1705//! In this case, there is a great desire to describe this XML in this way:
1706//! ```
1707//! /// Represents <element/>
1708//! type Element = ();
1709//!
1710//! /// Represents <root>...</root>
1711//! struct AnyName {
1712//!     // Incorrect
1713//!     list: Vec<Element>,
1714//! }
1715//! ```
1716//! This will not work, because potentially `<list>` element can have attributes
1717//! and other elements inside. You should define the struct for the `<list>`
1718//! explicitly, as you do that in the XSD for that XML:
1719//! ```
1720//! /// Represents <element/>
1721//! type Element = ();
1722//!
1723//! /// Represents <root>...</root>
1724//! struct AnyName {
1725//!     // Correct
1726//!     list: List,
1727//! }
1728//! /// Represents <list>...</list>
1729//! struct List {
1730//!     element: Vec<Element>,
1731//! }
1732//! ```
1733//!
1734//! If you want to simplify your API, you could write a simple function for unwrapping
1735//! inner list and apply it via [`deserialize_with`]:
1736//!
1737//! ```
1738//! # use pretty_assertions::assert_eq;
1739//! use quick_xml::de::from_str;
1740//! use serde::{Deserialize, Deserializer};
1741//!
1742//! /// Represents <element/>
1743//! type Element = ();
1744//!
1745//! /// Represents <root>...</root>
1746//! #[derive(Deserialize, Debug, PartialEq)]
1747//! struct AnyName {
1748//!     #[serde(deserialize_with = "unwrap_list")]
1749//!     list: Vec<Element>,
1750//! }
1751//!
1752//! fn unwrap_list<'de, D>(deserializer: D) -> Result<Vec<Element>, D::Error>
1753//! where
1754//!     D: Deserializer<'de>,
1755//! {
1756//!     /// Represents <list>...</list>
1757//!     #[derive(Deserialize)]
1758//!     struct List {
1759//!         // default allows empty list
1760//!         #[serde(default)]
1761//!         element: Vec<Element>,
1762//!     }
1763//!     Ok(List::deserialize(deserializer)?.element)
1764//! }
1765//!
1766//! assert_eq!(
1767//!     AnyName { list: vec![(), (), ()] },
1768//!     from_str("
1769//!         <root>
1770//!           <list>
1771//!             <element/>
1772//!             <element/>
1773//!             <element/>
1774//!           </list>
1775//!         </root>
1776//!     ").unwrap(),
1777//! );
1778//! ```
1779//!
1780//! Instead of writing such functions manually, you also could try <https://lib.rs/crates/serde-query>.
1781//!
1782//! Overlapped (Out-of-Order) Elements
1783//! ----------------------------------
1784//! In the case that the list might contain tags that are overlapped with
1785//! tags that do not correspond to the list (this is a usual case in XML
1786//! documents) like this:
1787//! ```xml
1788//! <any-name>
1789//!   <item/>
1790//!   <another-item/>
1791//!   <item/>
1792//!   <item/>
1793//! </any-name>
1794//! ```
1795//! you should enable the [`overlapped-lists`] feature to make it possible
1796//! to deserialize this to:
1797//! ```no_run
1798//! # use serde::Deserialize;
1799//! #[derive(Deserialize)]
1800//! #[serde(rename_all = "kebab-case")]
1801//! struct AnyName {
1802//!     item: Vec<()>,
1803//!     another_item: (),
1804//! }
1805//! ```
1806//!
1807//!
1808//! Internally Tagged Enums
1809//! -----------------------
1810//! [Tagged enums] are currently not supported because of an issue in the Serde
1811//! design (see [serde#1183] and [quick-xml#586]) and missing optimizations in
1812//! Serde which could be useful for XML parsing ([serde#1495]). This can be worked
1813//! around by manually implementing deserialize with `#[serde(deserialize_with = "func")]`
1814//! or implementing [`Deserialize`], but this can get very tedious very fast for
1815//! files with large amounts of tagged enums. To help with this issue quick-xml
1816//! provides a macro [`impl_deserialize_for_internally_tagged_enum!`]. See the
1817//! macro documentation for details.
1818//!
1819//!
1820//! [`overlapped-lists`]: ../index.html#overlapped-lists
1821//! [specification]: https://www.w3.org/TR/xmlschema11-1/#Simple_Type_Definition
1822//! [`deserialize_with`]: https://serde.rs/field-attrs.html#deserialize_with
1823//! [#497]: https://github.com/tafia/quick-xml/issues/497
1824//! [`Serializer::serialize_unit_variant`]: serde::Serializer::serialize_unit_variant
1825//! [`Deserializer::deserialize_enum`]: serde::Deserializer::deserialize_enum
1826//! [`SeError::Unsupported`]: crate::errors::serialize::SeError::Unsupported
1827//! [Tagged enums]: https://serde.rs/enum-representations.html#internally-tagged
1828//! [serde#1183]: https://github.com/serde-rs/serde/issues/1183
1829//! [serde#1495]: https://github.com/serde-rs/serde/issues/1495
1830//! [quick-xml#586]: https://github.com/tafia/quick-xml/issues/586
1831//! [`impl_deserialize_for_internally_tagged_enum!`]: crate::impl_deserialize_for_internally_tagged_enum
1832
1833// Macros should be defined before the modules that using them
1834// Also, macros should be imported before using them
1835use serde::serde_if_integer128;
1836
1837macro_rules! deserialize_num {
1838    ($deserialize:ident => $visit:ident, $($mut:tt)?) => {
1839        fn $deserialize<V>($($mut)? self, visitor: V) -> Result<V::Value, DeError>
1840        where
1841            V: Visitor<'de>,
1842        {
1843            // No need to unescape because valid integer representations cannot be escaped
1844            let text = self.read_string()?;
1845            match text.parse() {
1846                Ok(number) => visitor.$visit(number),
1847                Err(_) => match text {
1848                    Cow::Borrowed(t) => visitor.visit_str(t),
1849                    Cow::Owned(t) => visitor.visit_string(t),
1850                }
1851            }
1852        }
1853    };
1854}
1855
1856/// Implement deserialization methods for scalar types, such as numbers, strings,
1857/// byte arrays, booleans and identifiers.
1858macro_rules! deserialize_primitives {
1859    ($($mut:tt)?) => {
1860        deserialize_num!(deserialize_i8 => visit_i8, $($mut)?);
1861        deserialize_num!(deserialize_i16 => visit_i16, $($mut)?);
1862        deserialize_num!(deserialize_i32 => visit_i32, $($mut)?);
1863        deserialize_num!(deserialize_i64 => visit_i64, $($mut)?);
1864
1865        deserialize_num!(deserialize_u8 => visit_u8, $($mut)?);
1866        deserialize_num!(deserialize_u16 => visit_u16, $($mut)?);
1867        deserialize_num!(deserialize_u32 => visit_u32, $($mut)?);
1868        deserialize_num!(deserialize_u64 => visit_u64, $($mut)?);
1869
1870        serde_if_integer128! {
1871            deserialize_num!(deserialize_i128 => visit_i128, $($mut)?);
1872            deserialize_num!(deserialize_u128 => visit_u128, $($mut)?);
1873        }
1874
1875        deserialize_num!(deserialize_f32 => visit_f32, $($mut)?);
1876        deserialize_num!(deserialize_f64 => visit_f64, $($mut)?);
1877
1878        fn deserialize_bool<V>($($mut)? self, visitor: V) -> Result<V::Value, DeError>
1879        where
1880            V: Visitor<'de>,
1881        {
1882            let text = match self.read_string()? {
1883                Cow::Borrowed(s) => CowRef::Input(s),
1884                Cow::Owned(s) => CowRef::Owned(s),
1885            };
1886            text.deserialize_bool(visitor)
1887        }
1888
1889        /// Character represented as [strings](#method.deserialize_str).
1890        #[inline]
1891        fn deserialize_char<V>(self, visitor: V) -> Result<V::Value, DeError>
1892        where
1893            V: Visitor<'de>,
1894        {
1895            self.deserialize_str(visitor)
1896        }
1897
1898        fn deserialize_str<V>($($mut)? self, visitor: V) -> Result<V::Value, DeError>
1899        where
1900            V: Visitor<'de>,
1901        {
1902            let text = self.read_string()?;
1903            match text {
1904                Cow::Borrowed(string) => visitor.visit_borrowed_str(string),
1905                Cow::Owned(string) => visitor.visit_string(string),
1906            }
1907        }
1908
1909        /// Representation of owned strings the same as [non-owned](#method.deserialize_str).
1910        #[inline]
1911        fn deserialize_string<V>(self, visitor: V) -> Result<V::Value, DeError>
1912        where
1913            V: Visitor<'de>,
1914        {
1915            self.deserialize_str(visitor)
1916        }
1917
1918        /// Forwards deserialization to the [`deserialize_any`](#method.deserialize_any).
1919        #[inline]
1920        fn deserialize_bytes<V>(self, visitor: V) -> Result<V::Value, DeError>
1921        where
1922            V: Visitor<'de>,
1923        {
1924            self.deserialize_any(visitor)
1925        }
1926
1927        /// Forwards deserialization to the [`deserialize_bytes`](#method.deserialize_bytes).
1928        #[inline]
1929        fn deserialize_byte_buf<V>(self, visitor: V) -> Result<V::Value, DeError>
1930        where
1931            V: Visitor<'de>,
1932        {
1933            self.deserialize_bytes(visitor)
1934        }
1935
1936        /// Representation of the named units the same as [unnamed units](#method.deserialize_unit).
1937        #[inline]
1938        fn deserialize_unit_struct<V>(
1939            self,
1940            _name: &'static str,
1941            visitor: V,
1942        ) -> Result<V::Value, DeError>
1943        where
1944            V: Visitor<'de>,
1945        {
1946            self.deserialize_unit(visitor)
1947        }
1948
1949        /// Representation of tuples the same as [sequences](#method.deserialize_seq).
1950        #[inline]
1951        fn deserialize_tuple<V>(self, _len: usize, visitor: V) -> Result<V::Value, DeError>
1952        where
1953            V: Visitor<'de>,
1954        {
1955            self.deserialize_seq(visitor)
1956        }
1957
1958        /// Representation of named tuples the same as [unnamed tuples](#method.deserialize_tuple).
1959        #[inline]
1960        fn deserialize_tuple_struct<V>(
1961            self,
1962            _name: &'static str,
1963            len: usize,
1964            visitor: V,
1965        ) -> Result<V::Value, DeError>
1966        where
1967            V: Visitor<'de>,
1968        {
1969            self.deserialize_tuple(len, visitor)
1970        }
1971
1972        /// Forwards deserialization to the [`deserialize_struct`](#method.deserialize_struct)
1973        /// with empty name and fields.
1974        #[inline]
1975        fn deserialize_map<V>(self, visitor: V) -> Result<V::Value, DeError>
1976        where
1977            V: Visitor<'de>,
1978        {
1979            self.deserialize_struct("", &[], visitor)
1980        }
1981
1982        /// Identifiers represented as [strings](#method.deserialize_str).
1983        #[inline]
1984        fn deserialize_identifier<V>(self, visitor: V) -> Result<V::Value, DeError>
1985        where
1986            V: Visitor<'de>,
1987        {
1988            self.deserialize_str(visitor)
1989        }
1990
1991        /// Forwards deserialization to the [`deserialize_unit`](#method.deserialize_unit).
1992        #[inline]
1993        fn deserialize_ignored_any<V>(self, visitor: V) -> Result<V::Value, DeError>
1994        where
1995            V: Visitor<'de>,
1996        {
1997            self.deserialize_unit(visitor)
1998        }
1999    };
2000}
2001
2002mod key;
2003mod map;
2004mod resolver;
2005mod simple_type;
2006mod text;
2007mod var;
2008
2009pub use self::resolver::{EntityResolver, PredefinedEntityResolver};
2010pub use self::simple_type::SimpleTypeDeserializer;
2011pub use crate::errors::serialize::DeError;
2012
2013use crate::{
2014    de::map::ElementMapAccess,
2015    encoding::Decoder,
2016    errors::Error,
2017    events::{BytesCData, BytesEnd, BytesStart, BytesText, Event},
2018    name::QName,
2019    reader::Reader,
2020    utils::CowRef,
2021};
2022use serde::de::{
2023    self, Deserialize, DeserializeOwned, DeserializeSeed, IntoDeserializer, SeqAccess, Visitor,
2024};
2025use std::borrow::Cow;
2026#[cfg(feature = "overlapped-lists")]
2027use std::collections::VecDeque;
2028use std::io::BufRead;
2029use std::mem::replace;
2030#[cfg(feature = "overlapped-lists")]
2031use std::num::NonZeroUsize;
2032use std::ops::Deref;
2033
2034/// Data represented by a text node or a CDATA node. XML markup is not expected
2035pub(crate) const TEXT_KEY: &str = "$text";
2036/// Data represented by any XML markup inside
2037pub(crate) const VALUE_KEY: &str = "$value";
2038
2039/// Decoded and concatenated content of consequent [`Text`] and [`CData`]
2040/// events. _Consequent_ means that events should follow each other or be
2041/// delimited only by (any count of) [`Comment`] or [`PI`] events.
2042///
2043/// Internally text is stored in `Cow<str>`. Cloning of text is cheap while it
2044/// is borrowed and makes copies of data when it is owned.
2045///
2046/// [`Text`]: Event::Text
2047/// [`CData`]: Event::CData
2048/// [`Comment`]: Event::Comment
2049/// [`PI`]: Event::PI
2050#[derive(Clone, Debug, PartialEq, Eq)]
2051pub struct Text<'a> {
2052    text: Cow<'a, str>,
2053}
2054
2055impl<'a> Deref for Text<'a> {
2056    type Target = str;
2057
2058    #[inline]
2059    fn deref(&self) -> &Self::Target {
2060        self.text.deref()
2061    }
2062}
2063
2064impl<'a> From<&'a str> for Text<'a> {
2065    #[inline]
2066    fn from(text: &'a str) -> Self {
2067        Self {
2068            text: Cow::Borrowed(text),
2069        }
2070    }
2071}
2072
2073impl<'a> From<String> for Text<'a> {
2074    #[inline]
2075    fn from(text: String) -> Self {
2076        Self {
2077            text: Cow::Owned(text),
2078        }
2079    }
2080}
2081
2082impl<'a> From<Cow<'a, str>> for Text<'a> {
2083    #[inline]
2084    fn from(text: Cow<'a, str>) -> Self {
2085        Self { text }
2086    }
2087}
2088
2089////////////////////////////////////////////////////////////////////////////////////////////////////
2090
2091/// Simplified event which contains only these variants that used by deserializer
2092#[derive(Clone, Debug, PartialEq, Eq)]
2093pub enum DeEvent<'a> {
2094    /// Start tag (with attributes) `<tag attr="value">`.
2095    Start(BytesStart<'a>),
2096    /// End tag `</tag>`.
2097    End(BytesEnd<'a>),
2098    /// Decoded and concatenated content of consequent [`Text`] and [`CData`]
2099    /// events. _Consequent_ means that events should follow each other or be
2100    /// delimited only by (any count of) [`Comment`] or [`PI`] events.
2101    ///
2102    /// [`Text`]: Event::Text
2103    /// [`CData`]: Event::CData
2104    /// [`Comment`]: Event::Comment
2105    /// [`PI`]: Event::PI
2106    Text(Text<'a>),
2107    /// End of XML document.
2108    Eof,
2109}
2110
2111////////////////////////////////////////////////////////////////////////////////////////////////////
2112
2113/// Simplified event which contains only these variants that used by deserializer,
2114/// but [`Text`] events not yet fully processed.
2115///
2116/// [`Text`] events should be trimmed if they does not surrounded by the other
2117/// [`Text`] or [`CData`] events. This event contains intermediate state of [`Text`]
2118/// event, where they are trimmed from the start, but not from the end. To trim
2119/// end spaces we should lookahead by one deserializer event (i. e. skip all
2120/// comments and processing instructions).
2121///
2122/// [`Text`]: Event::Text
2123/// [`CData`]: Event::CData
2124#[derive(Clone, Debug, PartialEq, Eq)]
2125pub enum PayloadEvent<'a> {
2126    /// Start tag (with attributes) `<tag attr="value">`.
2127    Start(BytesStart<'a>),
2128    /// End tag `</tag>`.
2129    End(BytesEnd<'a>),
2130    /// Escaped character data between tags.
2131    Text(BytesText<'a>),
2132    /// Unescaped character data stored in `<![CDATA[...]]>`.
2133    CData(BytesCData<'a>),
2134    /// Document type definition data (DTD) stored in `<!DOCTYPE ...>`.
2135    DocType(BytesText<'a>),
2136    /// End of XML document.
2137    Eof,
2138}
2139
2140impl<'a> PayloadEvent<'a> {
2141    /// Ensures that all data is owned to extend the object's lifetime if necessary.
2142    #[inline]
2143    fn into_owned(self) -> PayloadEvent<'static> {
2144        match self {
2145            PayloadEvent::Start(e) => PayloadEvent::Start(e.into_owned()),
2146            PayloadEvent::End(e) => PayloadEvent::End(e.into_owned()),
2147            PayloadEvent::Text(e) => PayloadEvent::Text(e.into_owned()),
2148            PayloadEvent::CData(e) => PayloadEvent::CData(e.into_owned()),
2149            PayloadEvent::DocType(e) => PayloadEvent::DocType(e.into_owned()),
2150            PayloadEvent::Eof => PayloadEvent::Eof,
2151        }
2152    }
2153}
2154
2155/// An intermediate reader that consumes [`PayloadEvent`]s and produces final [`DeEvent`]s.
2156/// [`PayloadEvent::Text`] events, that followed by any event except
2157/// [`PayloadEvent::Text`] or [`PayloadEvent::CData`], are trimmed from the end.
2158struct XmlReader<'i, R: XmlRead<'i>, E: EntityResolver = PredefinedEntityResolver> {
2159    /// A source of low-level XML events
2160    reader: R,
2161    /// Intermediate event, that could be returned by the next call to `next()`.
2162    /// If that is the `Text` event then leading spaces already trimmed, but
2163    /// trailing spaces is not. Before the event will be returned, trimming of
2164    /// the spaces could be necessary
2165    lookahead: Result<PayloadEvent<'i>, DeError>,
2166
2167    /// Used to resolve unknown entities that would otherwise cause the parser
2168    /// to return an [`EscapeError::UnrecognizedEntity`] error.
2169    ///
2170    /// [`EscapeError::UnrecognizedEntity`]: crate::escape::EscapeError::UnrecognizedEntity
2171    entity_resolver: E,
2172}
2173
2174impl<'i, R: XmlRead<'i>, E: EntityResolver> XmlReader<'i, R, E> {
2175    fn new(mut reader: R, entity_resolver: E) -> Self {
2176        // Lookahead by one event immediately, so we do not need to check in the
2177        // loop if we need lookahead or not
2178        let lookahead = reader.next();
2179
2180        Self {
2181            reader,
2182            lookahead,
2183            entity_resolver,
2184        }
2185    }
2186
2187    /// Returns `true` if all events was consumed
2188    const fn is_empty(&self) -> bool {
2189        matches!(self.lookahead, Ok(PayloadEvent::Eof))
2190    }
2191
2192    /// Read next event and put it in lookahead, return the current lookahead
2193    #[inline(always)]
2194    fn next_impl(&mut self) -> Result<PayloadEvent<'i>, DeError> {
2195        replace(&mut self.lookahead, self.reader.next())
2196    }
2197
2198    /// Returns `true` when next event is not a text event in any form.
2199    #[inline(always)]
2200    const fn current_event_is_last_text(&self) -> bool {
2201        // If next event is a text or CDATA, we should not trim trailing spaces
2202        !matches!(
2203            self.lookahead,
2204            Ok(PayloadEvent::Text(_)) | Ok(PayloadEvent::CData(_))
2205        )
2206    }
2207
2208    /// Read all consequent [`Text`] and [`CData`] events until non-text event
2209    /// occurs. Content of all events would be appended to `result` and returned
2210    /// as [`DeEvent::Text`].
2211    ///
2212    /// [`Text`]: PayloadEvent::Text
2213    /// [`CData`]: PayloadEvent::CData
2214    fn drain_text(&mut self, mut result: Cow<'i, str>) -> Result<DeEvent<'i>, DeError> {
2215        loop {
2216            if self.current_event_is_last_text() {
2217                break;
2218            }
2219
2220            match self.next_impl()? {
2221                PayloadEvent::Text(mut e) => {
2222                    if self.current_event_is_last_text() {
2223                        // FIXME: Actually, we should trim after decoding text, but now we trim before
2224                        e.inplace_trim_end();
2225                    }
2226                    result
2227                        .to_mut()
2228                        .push_str(&e.unescape_with(|entity| self.entity_resolver.resolve(entity))?);
2229                }
2230                PayloadEvent::CData(e) => result.to_mut().push_str(&e.decode()?),
2231
2232                // SAFETY: current_event_is_last_text checks that event is Text or CData
2233                _ => unreachable!("Only `Text` and `CData` events can come here"),
2234            }
2235        }
2236        Ok(DeEvent::Text(Text { text: result }))
2237    }
2238
2239    /// Return an input-borrowing event.
2240    fn next(&mut self) -> Result<DeEvent<'i>, DeError> {
2241        loop {
2242            return match self.next_impl()? {
2243                PayloadEvent::Start(e) => Ok(DeEvent::Start(e)),
2244                PayloadEvent::End(e) => Ok(DeEvent::End(e)),
2245                PayloadEvent::Text(mut e) => {
2246                    if self.current_event_is_last_text() && e.inplace_trim_end() {
2247                        // FIXME: Actually, we should trim after decoding text, but now we trim before
2248                        continue;
2249                    }
2250                    self.drain_text(e.unescape_with(|entity| self.entity_resolver.resolve(entity))?)
2251                }
2252                PayloadEvent::CData(e) => self.drain_text(e.decode()?),
2253                PayloadEvent::DocType(e) => {
2254                    self.entity_resolver
2255                        .capture(e)
2256                        .map_err(|err| DeError::Custom(format!("cannot parse DTD: {}", err)))?;
2257                    continue;
2258                }
2259                PayloadEvent::Eof => Ok(DeEvent::Eof),
2260            };
2261        }
2262    }
2263
2264    #[inline]
2265    fn read_to_end(&mut self, name: QName) -> Result<(), DeError> {
2266        match self.lookahead {
2267            // We pre-read event with the same name that is required to be skipped.
2268            // First call of `read_to_end` will end out pre-read event, the second
2269            // will consume other events
2270            Ok(PayloadEvent::Start(ref e)) if e.name() == name => {
2271                let result1 = self.reader.read_to_end(name);
2272                let result2 = self.reader.read_to_end(name);
2273
2274                // In case of error `next_impl` returns `Eof`
2275                let _ = self.next_impl();
2276                result1?;
2277                result2?;
2278            }
2279            // We pre-read event with the same name that is required to be skipped.
2280            // Because this is end event, we already consume the whole tree, so
2281            // nothing to do, just update lookahead
2282            Ok(PayloadEvent::End(ref e)) if e.name() == name => {
2283                let _ = self.next_impl();
2284            }
2285            Ok(_) => {
2286                let result = self.reader.read_to_end(name);
2287
2288                // In case of error `next_impl` returns `Eof`
2289                let _ = self.next_impl();
2290                result?;
2291            }
2292            // Read next lookahead event, unpack error from the current lookahead
2293            Err(_) => {
2294                self.next_impl()?;
2295            }
2296        }
2297        Ok(())
2298    }
2299
2300    #[inline]
2301    fn decoder(&self) -> Decoder {
2302        self.reader.decoder()
2303    }
2304}
2305
2306////////////////////////////////////////////////////////////////////////////////////////////////////
2307
2308/// Deserialize an instance of type `T` from a string of XML text.
2309pub fn from_str<'de, T>(s: &'de str) -> Result<T, DeError>
2310where
2311    T: Deserialize<'de>,
2312{
2313    let mut de = Deserializer::from_str(s);
2314    T::deserialize(&mut de)
2315}
2316
2317/// Deserialize from a reader. This method will do internal copies of data
2318/// read from `reader`. If you want have a `&str` input and want to borrow
2319/// as much as possible, use [`from_str`].
2320pub fn from_reader<R, T>(reader: R) -> Result<T, DeError>
2321where
2322    R: BufRead,
2323    T: DeserializeOwned,
2324{
2325    let mut de = Deserializer::from_reader(reader);
2326    T::deserialize(&mut de)
2327}
2328
2329////////////////////////////////////////////////////////////////////////////////////////////////////
2330
2331/// A structure that deserializes XML into Rust values.
2332pub struct Deserializer<'de, R, E: EntityResolver = PredefinedEntityResolver>
2333where
2334    R: XmlRead<'de>,
2335{
2336    /// An XML reader that streams events into this deserializer
2337    reader: XmlReader<'de, R, E>,
2338
2339    /// When deserializing sequences sometimes we have to skip unwanted events.
2340    /// That events should be stored and then replayed. This is a replay buffer,
2341    /// that streams events while not empty. When it exhausted, events will
2342    /// requested from [`Self::reader`].
2343    #[cfg(feature = "overlapped-lists")]
2344    read: VecDeque<DeEvent<'de>>,
2345    /// When deserializing sequences sometimes we have to skip events, because XML
2346    /// is tolerant to elements order and even if in the XSD order is strictly
2347    /// specified (using `xs:sequence`) most of XML parsers allows order violations.
2348    /// That means, that elements, forming a sequence, could be overlapped with
2349    /// other elements, do not related to that sequence.
2350    ///
2351    /// In order to support this, deserializer will scan events and skip unwanted
2352    /// events, store them here. After call [`Self::start_replay()`] all events
2353    /// moved from this to [`Self::read`].
2354    #[cfg(feature = "overlapped-lists")]
2355    write: VecDeque<DeEvent<'de>>,
2356    /// Maximum number of events that can be skipped when processing sequences
2357    /// that occur out-of-order. This field is used to prevent potential
2358    /// denial-of-service (DoS) attacks which could cause infinite memory
2359    /// consumption when parsing a very large amount of XML into a sequence field.
2360    #[cfg(feature = "overlapped-lists")]
2361    limit: Option<NonZeroUsize>,
2362
2363    #[cfg(not(feature = "overlapped-lists"))]
2364    peek: Option<DeEvent<'de>>,
2365
2366    /// Buffer to store attribute name as a field name exposed to serde consumers
2367    key_buf: String,
2368}
2369
2370impl<'de, R, E> Deserializer<'de, R, E>
2371where
2372    R: XmlRead<'de>,
2373    E: EntityResolver,
2374{
2375    /// Create an XML deserializer from one of the possible quick_xml input sources.
2376    ///
2377    /// Typically it is more convenient to use one of these methods instead:
2378    ///
2379    ///  - [`Deserializer::from_str`]
2380    ///  - [`Deserializer::from_reader`]
2381    fn new(reader: R, entity_resolver: E) -> Self {
2382        Self {
2383            reader: XmlReader::new(reader, entity_resolver),
2384
2385            #[cfg(feature = "overlapped-lists")]
2386            read: VecDeque::new(),
2387            #[cfg(feature = "overlapped-lists")]
2388            write: VecDeque::new(),
2389            #[cfg(feature = "overlapped-lists")]
2390            limit: None,
2391
2392            #[cfg(not(feature = "overlapped-lists"))]
2393            peek: None,
2394
2395            key_buf: String::new(),
2396        }
2397    }
2398
2399    /// Returns `true` if all events was consumed.
2400    pub fn is_empty(&self) -> bool {
2401        #[cfg(feature = "overlapped-lists")]
2402        if self.read.is_empty() {
2403            return self.reader.is_empty();
2404        }
2405        #[cfg(not(feature = "overlapped-lists"))]
2406        if self.peek.is_none() {
2407            return self.reader.is_empty();
2408        }
2409        false
2410    }
2411
2412    /// Returns the underlying XML reader.
2413    ///
2414    /// ```
2415    /// # use pretty_assertions::assert_eq;
2416    /// use serde::Deserialize;
2417    /// use quick_xml::de::Deserializer;
2418    /// use quick_xml::Reader;
2419    ///
2420    /// #[derive(Deserialize)]
2421    /// struct SomeStruct {
2422    ///     field1: String,
2423    ///     field2: String,
2424    /// }
2425    ///
2426    /// // Try to deserialize from broken XML
2427    /// let mut de = Deserializer::from_str(
2428    ///     "<SomeStruct><field1><field2></SomeStruct>"
2429    /// //   0                           ^= 28        ^= 41
2430    /// );
2431    ///
2432    /// let err = SomeStruct::deserialize(&mut de);
2433    /// assert!(err.is_err());
2434    ///
2435    /// let reader: &Reader<_> = de.get_ref().get_ref();
2436    ///
2437    /// assert_eq!(reader.error_position(), 28);
2438    /// assert_eq!(reader.buffer_position(), 41);
2439    /// ```
2440    pub const fn get_ref(&self) -> &R {
2441        &self.reader.reader
2442    }
2443
2444    /// Set the maximum number of events that could be skipped during deserialization
2445    /// of sequences.
2446    ///
2447    /// If `<element>` contains more than specified nested elements, `$text` or
2448    /// CDATA nodes, then [`DeError::TooManyEvents`] will be returned during
2449    /// deserialization of sequence field (any type that uses [`deserialize_seq`]
2450    /// for the deserialization, for example, `Vec<T>`).
2451    ///
2452    /// This method can be used to prevent a [DoS] attack and infinite memory
2453    /// consumption when parsing a very large XML to a sequence field.
2454    ///
2455    /// It is strongly recommended to set limit to some value when you parse data
2456    /// from untrusted sources. You should choose a value that your typical XMLs
2457    /// can have _between_ different elements that corresponds to the same sequence.
2458    ///
2459    /// # Examples
2460    ///
2461    /// Let's imagine, that we deserialize such structure:
2462    /// ```
2463    /// struct List {
2464    ///   item: Vec<()>,
2465    /// }
2466    /// ```
2467    ///
2468    /// The XML that we try to parse look like this:
2469    /// ```xml
2470    /// <any-name>
2471    ///   <item/>
2472    ///   <!-- Bufferization starts at this point -->
2473    ///   <another-item>
2474    ///     <some-element>with text</some-element>
2475    ///     <yet-another-element/>
2476    ///   </another-item>
2477    ///   <!-- Buffer will be emptied at this point; 7 events were buffered -->
2478    ///   <item/>
2479    ///   <!-- There is nothing to buffer, because elements follows each other -->
2480    ///   <item/>
2481    /// </any-name>
2482    /// ```
2483    ///
2484    /// There, when we deserialize the `item` field, we need to buffer 7 events,
2485    /// before we can deserialize the second `<item/>`:
2486    ///
2487    /// - `<another-item>`
2488    /// - `<some-element>`
2489    /// - `$text(with text)`
2490    /// - `</some-element>`
2491    /// - `<yet-another-element/>` (virtual start event)
2492    /// - `<yet-another-element/>` (virtual end event)
2493    /// - `</another-item>`
2494    ///
2495    /// Note, that `<yet-another-element/>` internally represented as 2 events:
2496    /// one for the start tag and one for the end tag. In the future this can be
2497    /// eliminated, but for now we use [auto-expanding feature] of a reader,
2498    /// because this simplifies deserializer code.
2499    ///
2500    /// [`deserialize_seq`]: serde::Deserializer::deserialize_seq
2501    /// [DoS]: https://en.wikipedia.org/wiki/Denial-of-service_attack
2502    /// [auto-expanding feature]: crate::reader::Config::expand_empty_elements
2503    #[cfg(feature = "overlapped-lists")]
2504    pub fn event_buffer_size(&mut self, limit: Option<NonZeroUsize>) -> &mut Self {
2505        self.limit = limit;
2506        self
2507    }
2508
2509    #[cfg(feature = "overlapped-lists")]
2510    fn peek(&mut self) -> Result<&DeEvent<'de>, DeError> {
2511        if self.read.is_empty() {
2512            self.read.push_front(self.reader.next()?);
2513        }
2514        if let Some(event) = self.read.front() {
2515            return Ok(event);
2516        }
2517        // SAFETY: `self.read` was filled in the code above.
2518        // NOTE: Can be replaced with `unsafe { std::hint::unreachable_unchecked() }`
2519        // if unsafe code will be allowed
2520        unreachable!()
2521    }
2522    #[cfg(not(feature = "overlapped-lists"))]
2523    fn peek(&mut self) -> Result<&DeEvent<'de>, DeError> {
2524        if self.peek.is_none() {
2525            self.peek = Some(self.reader.next()?);
2526        }
2527        match self.peek.as_ref() {
2528            Some(v) => Ok(v),
2529            // SAFETY: a `None` variant for `self.peek` would have been replaced
2530            // by a `Some` variant in the code above.
2531            // TODO: Can be replaced with `unsafe { std::hint::unreachable_unchecked() }`
2532            // if unsafe code will be allowed
2533            None => unreachable!(),
2534        }
2535    }
2536
2537    fn next(&mut self) -> Result<DeEvent<'de>, DeError> {
2538        // Replay skipped or peeked events
2539        #[cfg(feature = "overlapped-lists")]
2540        if let Some(event) = self.read.pop_front() {
2541            return Ok(event);
2542        }
2543        #[cfg(not(feature = "overlapped-lists"))]
2544        if let Some(e) = self.peek.take() {
2545            return Ok(e);
2546        }
2547        self.reader.next()
2548    }
2549
2550    /// Returns the mark after which all events, skipped by [`Self::skip()`] call,
2551    /// should be replayed after calling [`Self::start_replay()`].
2552    #[cfg(feature = "overlapped-lists")]
2553    #[inline]
2554    #[must_use = "returned checkpoint should be used in `start_replay`"]
2555    fn skip_checkpoint(&self) -> usize {
2556        self.write.len()
2557    }
2558
2559    /// Extracts XML tree of events from and stores them in the skipped events
2560    /// buffer from which they can be retrieved later. You MUST call
2561    /// [`Self::start_replay()`] after calling this to give access to the skipped
2562    /// events and release internal buffers.
2563    #[cfg(feature = "overlapped-lists")]
2564    fn skip(&mut self) -> Result<(), DeError> {
2565        let event = self.next()?;
2566        self.skip_event(event)?;
2567        match self.write.back() {
2568            // Skip all subtree, if we skip a start event
2569            Some(DeEvent::Start(e)) => {
2570                let end = e.name().as_ref().to_owned();
2571                let mut depth = 0;
2572                loop {
2573                    let event = self.next()?;
2574                    match event {
2575                        DeEvent::Start(ref e) if e.name().as_ref() == end => {
2576                            self.skip_event(event)?;
2577                            depth += 1;
2578                        }
2579                        DeEvent::End(ref e) if e.name().as_ref() == end => {
2580                            self.skip_event(event)?;
2581                            if depth == 0 {
2582                                break;
2583                            }
2584                            depth -= 1;
2585                        }
2586                        DeEvent::Eof => {
2587                            self.skip_event(event)?;
2588                            break;
2589                        }
2590                        _ => self.skip_event(event)?,
2591                    }
2592                }
2593            }
2594            _ => (),
2595        }
2596        Ok(())
2597    }
2598
2599    #[cfg(feature = "overlapped-lists")]
2600    #[inline]
2601    fn skip_event(&mut self, event: DeEvent<'de>) -> Result<(), DeError> {
2602        if let Some(max) = self.limit {
2603            if self.write.len() >= max.get() {
2604                return Err(DeError::TooManyEvents(max));
2605            }
2606        }
2607        self.write.push_back(event);
2608        Ok(())
2609    }
2610
2611    /// Moves buffered events, skipped after given `checkpoint` from [`Self::write`]
2612    /// skip buffer to [`Self::read`] buffer.
2613    ///
2614    /// After calling this method, [`Self::peek()`] and [`Self::next()`] starts
2615    /// return events that was skipped previously by calling [`Self::skip()`],
2616    /// and only when all that events will be consumed, the deserializer starts
2617    /// to drain events from underlying reader.
2618    ///
2619    /// This method MUST be called if any number of [`Self::skip()`] was called
2620    /// after [`Self::new()`] or `start_replay()` or you'll lost events.
2621    #[cfg(feature = "overlapped-lists")]
2622    fn start_replay(&mut self, checkpoint: usize) {
2623        if checkpoint == 0 {
2624            self.write.append(&mut self.read);
2625            std::mem::swap(&mut self.read, &mut self.write);
2626        } else {
2627            let mut read = self.write.split_off(checkpoint);
2628            read.append(&mut self.read);
2629            self.read = read;
2630        }
2631    }
2632
2633    #[inline]
2634    fn read_string(&mut self) -> Result<Cow<'de, str>, DeError> {
2635        self.read_string_impl(true)
2636    }
2637
2638    /// Consumes consequent [`Text`] and [`CData`] (both a referred below as a _text_)
2639    /// events, merge them into one string. If there are no such events, returns
2640    /// an empty string.
2641    ///
2642    /// If `allow_start` is `false`, then only text events are consumed, for other
2643    /// events an error is returned (see table below).
2644    ///
2645    /// If `allow_start` is `true`, then two or three events are expected:
2646    /// - [`DeEvent::Start`];
2647    /// - _(optional)_ [`DeEvent::Text`] which content is returned;
2648    /// - [`DeEvent::End`]. If text event was missed, an empty string is returned.
2649    ///
2650    /// Corresponding events are consumed.
2651    ///
2652    /// # Handling events
2653    ///
2654    /// The table below shows how events is handled by this method:
2655    ///
2656    /// |Event             |XML                        |Handling
2657    /// |------------------|---------------------------|----------------------------------------
2658    /// |[`DeEvent::Start`]|`<tag>...</tag>`           |if `allow_start == true`, result determined by the second table, otherwise emits [`UnexpectedStart("tag")`](DeError::UnexpectedStart)
2659    /// |[`DeEvent::End`]  |`</any-tag>`               |This is impossible situation, the method will panic if it happens
2660    /// |[`DeEvent::Text`] |`text content` or `<![CDATA[cdata content]]>` (probably mixed)|Returns event content unchanged
2661    /// |[`DeEvent::Eof`]  |                           |Emits [`UnexpectedEof`](DeError::UnexpectedEof)
2662    ///
2663    /// Second event, consumed if [`DeEvent::Start`] was received and `allow_start == true`:
2664    ///
2665    /// |Event             |XML                        |Handling
2666    /// |------------------|---------------------------|----------------------------------------------------------------------------------
2667    /// |[`DeEvent::Start`]|`<any-tag>...</any-tag>`   |Emits [`UnexpectedStart("any-tag")`](DeError::UnexpectedStart)
2668    /// |[`DeEvent::End`]  |`</tag>`                   |Returns an empty slice. The reader guarantee that tag will match the open one
2669    /// |[`DeEvent::Text`] |`text content` or `<![CDATA[cdata content]]>` (probably mixed)|Returns event content unchanged, expects the `</tag>` after that
2670    /// |[`DeEvent::Eof`]  |                           |Emits [`InvalidXml(IllFormed(MissingEndTag))`](DeError::InvalidXml)
2671    ///
2672    /// [`Text`]: Event::Text
2673    /// [`CData`]: Event::CData
2674    fn read_string_impl(&mut self, allow_start: bool) -> Result<Cow<'de, str>, DeError> {
2675        match self.next()? {
2676            DeEvent::Text(e) => Ok(e.text),
2677            // allow one nested level
2678            DeEvent::Start(e) if allow_start => self.read_text(e.name()),
2679            DeEvent::Start(e) => Err(DeError::UnexpectedStart(e.name().as_ref().to_owned())),
2680            // SAFETY: The reader is guaranteed that we don't have unmatched tags
2681            // If we here, then out deserializer has a bug
2682            DeEvent::End(e) => unreachable!("{:?}", e),
2683            DeEvent::Eof => Err(DeError::UnexpectedEof),
2684        }
2685    }
2686    /// Consumes one [`DeEvent::Text`] event and ensures that it is followed by the
2687    /// [`DeEvent::End`] event.
2688    ///
2689    /// # Parameters
2690    /// - `name`: name of a tag opened before reading text. The corresponding end tag
2691    ///   should present in input just after the text
2692    fn read_text(&mut self, name: QName) -> Result<Cow<'de, str>, DeError> {
2693        match self.next()? {
2694            DeEvent::Text(e) => match self.next()? {
2695                // The matching tag name is guaranteed by the reader
2696                DeEvent::End(_) => Ok(e.text),
2697                // SAFETY: Cannot be two consequent Text events, they would be merged into one
2698                DeEvent::Text(_) => unreachable!(),
2699                DeEvent::Start(e) => Err(DeError::UnexpectedStart(e.name().as_ref().to_owned())),
2700                DeEvent::Eof => Err(Error::missed_end(name, self.reader.decoder()).into()),
2701            },
2702            // We can get End event in case of `<tag></tag>` or `<tag/>` input
2703            // Return empty text in that case
2704            // The matching tag name is guaranteed by the reader
2705            DeEvent::End(_) => Ok("".into()),
2706            DeEvent::Start(s) => Err(DeError::UnexpectedStart(s.name().as_ref().to_owned())),
2707            DeEvent::Eof => Err(Error::missed_end(name, self.reader.decoder()).into()),
2708        }
2709    }
2710
2711    /// Drops all events until event with [name](BytesEnd::name()) `name` won't be
2712    /// dropped. This method should be called after [`Self::next()`]
2713    #[cfg(feature = "overlapped-lists")]
2714    fn read_to_end(&mut self, name: QName) -> Result<(), DeError> {
2715        let mut depth = 0;
2716        loop {
2717            match self.read.pop_front() {
2718                Some(DeEvent::Start(e)) if e.name() == name => {
2719                    depth += 1;
2720                }
2721                Some(DeEvent::End(e)) if e.name() == name => {
2722                    if depth == 0 {
2723                        break;
2724                    }
2725                    depth -= 1;
2726                }
2727
2728                // Drop all other skipped events
2729                Some(_) => continue,
2730
2731                // If we do not have skipped events, use effective reading that will
2732                // not allocate memory for events
2733                None => {
2734                    // We should close all opened tags, because we could buffer
2735                    // Start events, but not the corresponding End events. So we
2736                    // keep reading events until we exit all nested tags.
2737                    // `read_to_end()` will return an error if an Eof was encountered
2738                    // preliminary (in case of malformed XML).
2739                    //
2740                    // <tag><tag></tag></tag>
2741                    // ^^^^^^^^^^             - buffered in `self.read`, when `self.read_to_end()` is called, depth = 2
2742                    //           ^^^^^^       - read by the first call of `self.reader.read_to_end()`
2743                    //                 ^^^^^^ - read by the second call of `self.reader.read_to_end()`
2744                    loop {
2745                        self.reader.read_to_end(name)?;
2746                        if depth == 0 {
2747                            break;
2748                        }
2749                        depth -= 1;
2750                    }
2751                    break;
2752                }
2753            }
2754        }
2755        Ok(())
2756    }
2757    #[cfg(not(feature = "overlapped-lists"))]
2758    fn read_to_end(&mut self, name: QName) -> Result<(), DeError> {
2759        // First one might be in self.peek
2760        match self.next()? {
2761            DeEvent::Start(e) => self.reader.read_to_end(e.name())?,
2762            DeEvent::End(e) if e.name() == name => return Ok(()),
2763            _ => (),
2764        }
2765        self.reader.read_to_end(name)
2766    }
2767}
2768
2769impl<'de> Deserializer<'de, SliceReader<'de>> {
2770    /// Create new deserializer that will borrow data from the specified string.
2771    ///
2772    /// Deserializer created with this method will not resolve custom entities.
2773    #[allow(clippy::should_implement_trait)]
2774    pub fn from_str(source: &'de str) -> Self {
2775        Self::from_str_with_resolver(source, PredefinedEntityResolver)
2776    }
2777}
2778
2779impl<'de, E> Deserializer<'de, SliceReader<'de>, E>
2780where
2781    E: EntityResolver,
2782{
2783    /// Create new deserializer that will borrow data from the specified string
2784    /// and use specified entity resolver.
2785    pub fn from_str_with_resolver(source: &'de str, entity_resolver: E) -> Self {
2786        let mut reader = Reader::from_str(source);
2787        let config = reader.config_mut();
2788        config.expand_empty_elements = true;
2789
2790        Self::new(
2791            SliceReader {
2792                reader,
2793                start_trimmer: StartTrimmer::default(),
2794            },
2795            entity_resolver,
2796        )
2797    }
2798}
2799
2800impl<'de, R> Deserializer<'de, IoReader<R>>
2801where
2802    R: BufRead,
2803{
2804    /// Create new deserializer that will copy data from the specified reader
2805    /// into internal buffer.
2806    ///
2807    /// If you already have a string use [`Self::from_str`] instead, because it
2808    /// will borrow instead of copy. If you have `&[u8]` which is known to represent
2809    /// UTF-8, you can decode it first before using [`from_str`].
2810    ///
2811    /// Deserializer created with this method will not resolve custom entities.
2812    pub fn from_reader(reader: R) -> Self {
2813        Self::with_resolver(reader, PredefinedEntityResolver)
2814    }
2815}
2816
2817impl<'de, R, E> Deserializer<'de, IoReader<R>, E>
2818where
2819    R: BufRead,
2820    E: EntityResolver,
2821{
2822    /// Create new deserializer that will copy data from the specified reader
2823    /// into internal buffer and use specified entity resolver.
2824    ///
2825    /// If you already have a string use [`Self::from_str`] instead, because it
2826    /// will borrow instead of copy. If you have `&[u8]` which is known to represent
2827    /// UTF-8, you can decode it first before using [`from_str`].
2828    pub fn with_resolver(reader: R, entity_resolver: E) -> Self {
2829        let mut reader = Reader::from_reader(reader);
2830        let config = reader.config_mut();
2831        config.expand_empty_elements = true;
2832
2833        Self::new(
2834            IoReader {
2835                reader,
2836                start_trimmer: StartTrimmer::default(),
2837                buf: Vec::new(),
2838            },
2839            entity_resolver,
2840        )
2841    }
2842}
2843
2844impl<'de, 'a, R, E> de::Deserializer<'de> for &'a mut Deserializer<'de, R, E>
2845where
2846    R: XmlRead<'de>,
2847    E: EntityResolver,
2848{
2849    type Error = DeError;
2850
2851    deserialize_primitives!();
2852
2853    fn deserialize_struct<V>(
2854        self,
2855        _name: &'static str,
2856        fields: &'static [&'static str],
2857        visitor: V,
2858    ) -> Result<V::Value, DeError>
2859    where
2860        V: Visitor<'de>,
2861    {
2862        match self.next()? {
2863            DeEvent::Start(e) => visitor.visit_map(ElementMapAccess::new(self, e, fields)?),
2864            // SAFETY: The reader is guaranteed that we don't have unmatched tags
2865            // If we here, then out deserializer has a bug
2866            DeEvent::End(e) => unreachable!("{:?}", e),
2867            // Deserializer methods are only hints, if deserializer could not satisfy
2868            // request, it should return the data that it has. It is responsibility
2869            // of a Visitor to return an error if it does not understand the data
2870            DeEvent::Text(e) => match e.text {
2871                Cow::Borrowed(s) => visitor.visit_borrowed_str(s),
2872                Cow::Owned(s) => visitor.visit_string(s),
2873            },
2874            DeEvent::Eof => Err(DeError::UnexpectedEof),
2875        }
2876    }
2877
2878    /// Unit represented in XML as a `xs:element` or text/CDATA content.
2879    /// Any content inside `xs:element` is ignored and skipped.
2880    ///
2881    /// Produces unit struct from any of following inputs:
2882    /// - any `<tag ...>...</tag>`
2883    /// - any `<tag .../>`
2884    /// - any consequent text / CDATA content (can consist of several parts
2885    ///   delimited by comments and processing instructions)
2886    ///
2887    /// # Events handling
2888    ///
2889    /// |Event             |XML                        |Handling
2890    /// |------------------|---------------------------|-------------------------------------------
2891    /// |[`DeEvent::Start`]|`<tag>...</tag>`           |Calls `visitor.visit_unit()`, consumes all events up to and including corresponding `End` event
2892    /// |[`DeEvent::End`]  |`</tag>`                   |This is impossible situation, the method will panic if it happens
2893    /// |[`DeEvent::Text`] |`text content` or `<![CDATA[cdata content]]>` (probably mixed)|Calls `visitor.visit_unit()`. The content is ignored
2894    /// |[`DeEvent::Eof`]  |                           |Emits [`UnexpectedEof`](DeError::UnexpectedEof)
2895    fn deserialize_unit<V>(self, visitor: V) -> Result<V::Value, DeError>
2896    where
2897        V: Visitor<'de>,
2898    {
2899        match self.next()? {
2900            DeEvent::Start(s) => {
2901                self.read_to_end(s.name())?;
2902                visitor.visit_unit()
2903            }
2904            DeEvent::Text(_) => visitor.visit_unit(),
2905            // SAFETY: The reader is guaranteed that we don't have unmatched tags
2906            // If we here, then out deserializer has a bug
2907            DeEvent::End(e) => unreachable!("{:?}", e),
2908            DeEvent::Eof => Err(DeError::UnexpectedEof),
2909        }
2910    }
2911
2912    /// Forwards deserialization of the inner type. Always calls [`Visitor::visit_newtype_struct`]
2913    /// with the same deserializer.
2914    fn deserialize_newtype_struct<V>(
2915        self,
2916        _name: &'static str,
2917        visitor: V,
2918    ) -> Result<V::Value, DeError>
2919    where
2920        V: Visitor<'de>,
2921    {
2922        visitor.visit_newtype_struct(self)
2923    }
2924
2925    fn deserialize_enum<V>(
2926        self,
2927        _name: &'static str,
2928        _variants: &'static [&'static str],
2929        visitor: V,
2930    ) -> Result<V::Value, DeError>
2931    where
2932        V: Visitor<'de>,
2933    {
2934        visitor.visit_enum(var::EnumAccess::new(self))
2935    }
2936
2937    fn deserialize_seq<V>(self, visitor: V) -> Result<V::Value, DeError>
2938    where
2939        V: Visitor<'de>,
2940    {
2941        visitor.visit_seq(self)
2942    }
2943
2944    fn deserialize_option<V>(self, visitor: V) -> Result<V::Value, DeError>
2945    where
2946        V: Visitor<'de>,
2947    {
2948        match self.peek()? {
2949            DeEvent::Text(t) if t.is_empty() => visitor.visit_none(),
2950            DeEvent::Eof => visitor.visit_none(),
2951            _ => visitor.visit_some(self),
2952        }
2953    }
2954
2955    fn deserialize_any<V>(self, visitor: V) -> Result<V::Value, DeError>
2956    where
2957        V: Visitor<'de>,
2958    {
2959        match self.peek()? {
2960            DeEvent::Text(_) => self.deserialize_str(visitor),
2961            _ => self.deserialize_map(visitor),
2962        }
2963    }
2964}
2965
2966/// An accessor to sequence elements forming a value for top-level sequence of XML
2967/// elements.
2968///
2969/// Technically, multiple top-level elements violates XML rule of only one top-level
2970/// element, but we consider this as several concatenated XML documents.
2971impl<'de, 'a, R, E> SeqAccess<'de> for &'a mut Deserializer<'de, R, E>
2972where
2973    R: XmlRead<'de>,
2974    E: EntityResolver,
2975{
2976    type Error = DeError;
2977
2978    fn next_element_seed<T>(&mut self, seed: T) -> Result<Option<T::Value>, Self::Error>
2979    where
2980        T: DeserializeSeed<'de>,
2981    {
2982        match self.peek()? {
2983            DeEvent::Eof => {
2984                // We need to consume event in order to self.is_empty() worked
2985                self.next()?;
2986                Ok(None)
2987            }
2988
2989            // Start(tag), End(tag), Text
2990            _ => seed.deserialize(&mut **self).map(Some),
2991        }
2992    }
2993}
2994
2995impl<'de, 'a, R, E> IntoDeserializer<'de, DeError> for &'a mut Deserializer<'de, R, E>
2996where
2997    R: XmlRead<'de>,
2998    E: EntityResolver,
2999{
3000    type Deserializer = Self;
3001
3002    #[inline]
3003    fn into_deserializer(self) -> Self {
3004        self
3005    }
3006}
3007
3008////////////////////////////////////////////////////////////////////////////////////////////////////
3009
3010/// Helper struct that contains a state for an algorithm of converting events
3011/// from raw events to semi-trimmed events that is independent from a way of
3012/// events reading.
3013struct StartTrimmer {
3014    /// If `true`, then leading whitespace will be removed from next returned
3015    /// [`Event::Text`]. This field is set to `true` after reading each event
3016    /// except [`Event::Text`] and [`Event::CData`], so [`Event::Text`] events
3017    /// read right after them does not trimmed.
3018    trim_start: bool,
3019}
3020
3021impl StartTrimmer {
3022    /// Converts raw reader's event into a payload event.
3023    /// Returns `None`, if event should be skipped.
3024    #[inline(always)]
3025    fn trim<'a>(&mut self, event: Event<'a>) -> Option<PayloadEvent<'a>> {
3026        let (event, trim_next_event) = match event {
3027            Event::DocType(e) => (PayloadEvent::DocType(e), true),
3028            Event::Start(e) => (PayloadEvent::Start(e), true),
3029            Event::End(e) => (PayloadEvent::End(e), true),
3030            Event::Eof => (PayloadEvent::Eof, true),
3031
3032            // Do not trim next text event after Text or CDATA event
3033            Event::CData(e) => (PayloadEvent::CData(e), false),
3034            Event::Text(mut e) => {
3035                // If event is empty after trimming, skip it
3036                if self.trim_start && e.inplace_trim_start() {
3037                    return None;
3038                }
3039                (PayloadEvent::Text(e), false)
3040            }
3041
3042            _ => return None,
3043        };
3044        self.trim_start = trim_next_event;
3045        Some(event)
3046    }
3047}
3048
3049impl Default for StartTrimmer {
3050    #[inline]
3051    fn default() -> Self {
3052        Self { trim_start: true }
3053    }
3054}
3055
3056////////////////////////////////////////////////////////////////////////////////////////////////////
3057
3058/// Trait used by the deserializer for iterating over input. This is manually
3059/// "specialized" for iterating over `&[u8]`.
3060///
3061/// You do not need to implement this trait, it is needed to abstract from
3062/// [borrowing](SliceReader) and [copying](IoReader) data sources and reuse code in
3063/// deserializer
3064pub trait XmlRead<'i> {
3065    /// Return an input-borrowing event.
3066    fn next(&mut self) -> Result<PayloadEvent<'i>, DeError>;
3067
3068    /// Skips until end element is found. Unlike `next()` it will not allocate
3069    /// when it cannot satisfy the lifetime.
3070    fn read_to_end(&mut self, name: QName) -> Result<(), DeError>;
3071
3072    /// A copy of the reader's decoder used to decode strings.
3073    fn decoder(&self) -> Decoder;
3074}
3075
3076/// XML input source that reads from a std::io input stream.
3077///
3078/// You cannot create it, it is created automatically when you call
3079/// [`Deserializer::from_reader`]
3080pub struct IoReader<R: BufRead> {
3081    reader: Reader<R>,
3082    start_trimmer: StartTrimmer,
3083    buf: Vec<u8>,
3084}
3085
3086impl<R: BufRead> IoReader<R> {
3087    /// Returns the underlying XML reader.
3088    ///
3089    /// ```
3090    /// # use pretty_assertions::assert_eq;
3091    /// use serde::Deserialize;
3092    /// use std::io::Cursor;
3093    /// use quick_xml::de::Deserializer;
3094    /// use quick_xml::Reader;
3095    ///
3096    /// #[derive(Deserialize)]
3097    /// struct SomeStruct {
3098    ///     field1: String,
3099    ///     field2: String,
3100    /// }
3101    ///
3102    /// // Try to deserialize from broken XML
3103    /// let mut de = Deserializer::from_reader(Cursor::new(
3104    ///     "<SomeStruct><field1><field2></SomeStruct>"
3105    /// //   0                           ^= 28        ^= 41
3106    /// ));
3107    ///
3108    /// let err = SomeStruct::deserialize(&mut de);
3109    /// assert!(err.is_err());
3110    ///
3111    /// let reader: &Reader<Cursor<&str>> = de.get_ref().get_ref();
3112    ///
3113    /// assert_eq!(reader.error_position(), 28);
3114    /// assert_eq!(reader.buffer_position(), 41);
3115    /// ```
3116    pub const fn get_ref(&self) -> &Reader<R> {
3117        &self.reader
3118    }
3119}
3120
3121impl<'i, R: BufRead> XmlRead<'i> for IoReader<R> {
3122    fn next(&mut self) -> Result<PayloadEvent<'static>, DeError> {
3123        loop {
3124            self.buf.clear();
3125
3126            let event = self.reader.read_event_into(&mut self.buf)?;
3127            if let Some(event) = self.start_trimmer.trim(event) {
3128                return Ok(event.into_owned());
3129            }
3130        }
3131    }
3132
3133    fn read_to_end(&mut self, name: QName) -> Result<(), DeError> {
3134        match self.reader.read_to_end_into(name, &mut self.buf) {
3135            Err(e) => Err(e.into()),
3136            Ok(_) => Ok(()),
3137        }
3138    }
3139
3140    fn decoder(&self) -> Decoder {
3141        self.reader.decoder()
3142    }
3143}
3144
3145/// XML input source that reads from a slice of bytes and can borrow from it.
3146///
3147/// You cannot create it, it is created automatically when you call
3148/// [`Deserializer::from_str`].
3149pub struct SliceReader<'de> {
3150    reader: Reader<&'de [u8]>,
3151    start_trimmer: StartTrimmer,
3152}
3153
3154impl<'de> SliceReader<'de> {
3155    /// Returns the underlying XML reader.
3156    ///
3157    /// ```
3158    /// # use pretty_assertions::assert_eq;
3159    /// use serde::Deserialize;
3160    /// use quick_xml::de::Deserializer;
3161    /// use quick_xml::Reader;
3162    ///
3163    /// #[derive(Deserialize)]
3164    /// struct SomeStruct {
3165    ///     field1: String,
3166    ///     field2: String,
3167    /// }
3168    ///
3169    /// // Try to deserialize from broken XML
3170    /// let mut de = Deserializer::from_str(
3171    ///     "<SomeStruct><field1><field2></SomeStruct>"
3172    /// //   0                           ^= 28        ^= 41
3173    /// );
3174    ///
3175    /// let err = SomeStruct::deserialize(&mut de);
3176    /// assert!(err.is_err());
3177    ///
3178    /// let reader: &Reader<&[u8]> = de.get_ref().get_ref();
3179    ///
3180    /// assert_eq!(reader.error_position(), 28);
3181    /// assert_eq!(reader.buffer_position(), 41);
3182    /// ```
3183    pub const fn get_ref(&self) -> &Reader<&'de [u8]> {
3184        &self.reader
3185    }
3186}
3187
3188impl<'de> XmlRead<'de> for SliceReader<'de> {
3189    fn next(&mut self) -> Result<PayloadEvent<'de>, DeError> {
3190        loop {
3191            let event = self.reader.read_event()?;
3192            if let Some(event) = self.start_trimmer.trim(event) {
3193                return Ok(event);
3194            }
3195        }
3196    }
3197
3198    fn read_to_end(&mut self, name: QName) -> Result<(), DeError> {
3199        match self.reader.read_to_end(name) {
3200            Err(e) => Err(e.into()),
3201            Ok(_) => Ok(()),
3202        }
3203    }
3204
3205    fn decoder(&self) -> Decoder {
3206        self.reader.decoder()
3207    }
3208}
3209
3210#[cfg(test)]
3211mod tests {
3212    use super::*;
3213    use crate::errors::IllFormedError;
3214    use pretty_assertions::assert_eq;
3215
3216    fn make_de<'de>(source: &'de str) -> Deserializer<'de, SliceReader<'de>> {
3217        dbg!(source);
3218        Deserializer::from_str(source)
3219    }
3220
3221    #[cfg(feature = "overlapped-lists")]
3222    mod skip {
3223        use super::*;
3224        use crate::de::DeEvent::*;
3225        use crate::events::BytesEnd;
3226        use pretty_assertions::assert_eq;
3227
3228        /// Checks that `peek()` and `read()` behaves correctly after `skip()`
3229        #[test]
3230        fn read_and_peek() {
3231            let mut de = make_de(
3232                r#"
3233                <root>
3234                    <inner>
3235                        text
3236                        <inner/>
3237                    </inner>
3238                    <next/>
3239                    <target/>
3240                </root>
3241                "#,
3242            );
3243
3244            // Initial conditions - both are empty
3245            assert_eq!(de.read, vec![]);
3246            assert_eq!(de.write, vec![]);
3247
3248            assert_eq!(de.next().unwrap(), Start(BytesStart::new("root")));
3249            assert_eq!(de.peek().unwrap(), &Start(BytesStart::new("inner")));
3250
3251            // Mark that start_replay() should begin replay from this point
3252            let checkpoint = de.skip_checkpoint();
3253            assert_eq!(checkpoint, 0);
3254
3255            // Should skip first <inner> tree
3256            de.skip().unwrap();
3257            assert_eq!(de.read, vec![]);
3258            assert_eq!(
3259                de.write,
3260                vec![
3261                    Start(BytesStart::new("inner")),
3262                    Text("text".into()),
3263                    Start(BytesStart::new("inner")),
3264                    End(BytesEnd::new("inner")),
3265                    End(BytesEnd::new("inner")),
3266                ]
3267            );
3268
3269            // Consume <next/>. Now unconsumed XML looks like:
3270            //
3271            //   <inner>
3272            //     text
3273            //     <inner/>
3274            //   </inner>
3275            //   <target/>
3276            // </root>
3277            assert_eq!(de.next().unwrap(), Start(BytesStart::new("next")));
3278            assert_eq!(de.next().unwrap(), End(BytesEnd::new("next")));
3279
3280            // We finish writing. Next call to `next()` should start replay that messages:
3281            //
3282            //   <inner>
3283            //     text
3284            //     <inner/>
3285            //   </inner>
3286            //
3287            // and after that stream that messages:
3288            //
3289            //   <target/>
3290            // </root>
3291            de.start_replay(checkpoint);
3292            assert_eq!(
3293                de.read,
3294                vec![
3295                    Start(BytesStart::new("inner")),
3296                    Text("text".into()),
3297                    Start(BytesStart::new("inner")),
3298                    End(BytesEnd::new("inner")),
3299                    End(BytesEnd::new("inner")),
3300                ]
3301            );
3302            assert_eq!(de.write, vec![]);
3303            assert_eq!(de.next().unwrap(), Start(BytesStart::new("inner")));
3304
3305            // Mark that start_replay() should begin replay from this point
3306            let checkpoint = de.skip_checkpoint();
3307            assert_eq!(checkpoint, 0);
3308
3309            // Skip `$text` node and consume <inner/> after it
3310            de.skip().unwrap();
3311            assert_eq!(
3312                de.read,
3313                vec![
3314                    Start(BytesStart::new("inner")),
3315                    End(BytesEnd::new("inner")),
3316                    End(BytesEnd::new("inner")),
3317                ]
3318            );
3319            assert_eq!(
3320                de.write,
3321                vec![
3322                    // This comment here to keep the same formatting of both arrays
3323                    // otherwise rustfmt suggest one-line it
3324                    Text("text".into()),
3325                ]
3326            );
3327
3328            assert_eq!(de.next().unwrap(), Start(BytesStart::new("inner")));
3329            assert_eq!(de.next().unwrap(), End(BytesEnd::new("inner")));
3330
3331            // We finish writing. Next call to `next()` should start replay messages:
3332            //
3333            //     text
3334            //   </inner>
3335            //
3336            // and after that stream that messages:
3337            //
3338            //   <target/>
3339            // </root>
3340            de.start_replay(checkpoint);
3341            assert_eq!(
3342                de.read,
3343                vec![
3344                    // This comment here to keep the same formatting as others
3345                    // otherwise rustfmt suggest one-line it
3346                    Text("text".into()),
3347                    End(BytesEnd::new("inner")),
3348                ]
3349            );
3350            assert_eq!(de.write, vec![]);
3351            assert_eq!(de.next().unwrap(), Text("text".into()));
3352            assert_eq!(de.next().unwrap(), End(BytesEnd::new("inner")));
3353            assert_eq!(de.next().unwrap(), Start(BytesStart::new("target")));
3354            assert_eq!(de.next().unwrap(), End(BytesEnd::new("target")));
3355            assert_eq!(de.next().unwrap(), End(BytesEnd::new("root")));
3356            assert_eq!(de.next().unwrap(), Eof);
3357        }
3358
3359        /// Checks that `read_to_end()` behaves correctly after `skip()`
3360        #[test]
3361        fn read_to_end() {
3362            let mut de = make_de(
3363                r#"
3364                <root>
3365                    <skip>
3366                        text
3367                        <skip/>
3368                    </skip>
3369                    <target>
3370                        <target/>
3371                    </target>
3372                </root>
3373                "#,
3374            );
3375
3376            // Initial conditions - both are empty
3377            assert_eq!(de.read, vec![]);
3378            assert_eq!(de.write, vec![]);
3379
3380            assert_eq!(de.next().unwrap(), Start(BytesStart::new("root")));
3381
3382            // Mark that start_replay() should begin replay from this point
3383            let checkpoint = de.skip_checkpoint();
3384            assert_eq!(checkpoint, 0);
3385
3386            // Skip the <skip> tree
3387            de.skip().unwrap();
3388            assert_eq!(de.read, vec![]);
3389            assert_eq!(
3390                de.write,
3391                vec![
3392                    Start(BytesStart::new("skip")),
3393                    Text("text".into()),
3394                    Start(BytesStart::new("skip")),
3395                    End(BytesEnd::new("skip")),
3396                    End(BytesEnd::new("skip")),
3397                ]
3398            );
3399
3400            // Drop all events that represents <target> tree. Now unconsumed XML looks like:
3401            //
3402            //   <skip>
3403            //     text
3404            //     <skip/>
3405            //   </skip>
3406            // </root>
3407            assert_eq!(de.next().unwrap(), Start(BytesStart::new("target")));
3408            de.read_to_end(QName(b"target")).unwrap();
3409            assert_eq!(de.read, vec![]);
3410            assert_eq!(
3411                de.write,
3412                vec![
3413                    Start(BytesStart::new("skip")),
3414                    Text("text".into()),
3415                    Start(BytesStart::new("skip")),
3416                    End(BytesEnd::new("skip")),
3417                    End(BytesEnd::new("skip")),
3418                ]
3419            );
3420
3421            // We finish writing. Next call to `next()` should start replay that messages:
3422            //
3423            //   <skip>
3424            //     text
3425            //     <skip/>
3426            //   </skip>
3427            //
3428            // and after that stream that messages:
3429            //
3430            // </root>
3431            de.start_replay(checkpoint);
3432            assert_eq!(
3433                de.read,
3434                vec![
3435                    Start(BytesStart::new("skip")),
3436                    Text("text".into()),
3437                    Start(BytesStart::new("skip")),
3438                    End(BytesEnd::new("skip")),
3439                    End(BytesEnd::new("skip")),
3440                ]
3441            );
3442            assert_eq!(de.write, vec![]);
3443
3444            assert_eq!(de.next().unwrap(), Start(BytesStart::new("skip")));
3445            de.read_to_end(QName(b"skip")).unwrap();
3446
3447            assert_eq!(de.next().unwrap(), End(BytesEnd::new("root")));
3448            assert_eq!(de.next().unwrap(), Eof);
3449        }
3450
3451        /// Checks that replay replayes only part of events
3452        /// Test for https://github.com/tafia/quick-xml/issues/435
3453        #[test]
3454        fn partial_replay() {
3455            let mut de = make_de(
3456                r#"
3457                <root>
3458                    <skipped-1/>
3459                    <skipped-2/>
3460                    <inner>
3461                        <skipped-3/>
3462                        <skipped-4/>
3463                        <target-2/>
3464                    </inner>
3465                    <target-1/>
3466                </root>
3467                "#,
3468            );
3469
3470            // Initial conditions - both are empty
3471            assert_eq!(de.read, vec![]);
3472            assert_eq!(de.write, vec![]);
3473
3474            assert_eq!(de.next().unwrap(), Start(BytesStart::new("root")));
3475
3476            // start_replay() should start replay from this point
3477            let checkpoint1 = de.skip_checkpoint();
3478            assert_eq!(checkpoint1, 0);
3479
3480            // Should skip first and second <skipped-N/> elements
3481            de.skip().unwrap(); // skipped-1
3482            de.skip().unwrap(); // skipped-2
3483            assert_eq!(de.read, vec![]);
3484            assert_eq!(
3485                de.write,
3486                vec![
3487                    Start(BytesStart::new("skipped-1")),
3488                    End(BytesEnd::new("skipped-1")),
3489                    Start(BytesStart::new("skipped-2")),
3490                    End(BytesEnd::new("skipped-2")),
3491                ]
3492            );
3493
3494            ////////////////////////////////////////////////////////////////////////////////////////
3495
3496            assert_eq!(de.next().unwrap(), Start(BytesStart::new("inner")));
3497            assert_eq!(de.peek().unwrap(), &Start(BytesStart::new("skipped-3")));
3498            assert_eq!(
3499                de.read,
3500                vec![
3501                    // This comment here to keep the same formatting of both arrays
3502                    // otherwise rustfmt suggest one-line it
3503                    Start(BytesStart::new("skipped-3")),
3504                ]
3505            );
3506            assert_eq!(
3507                de.write,
3508                vec![
3509                    Start(BytesStart::new("skipped-1")),
3510                    End(BytesEnd::new("skipped-1")),
3511                    Start(BytesStart::new("skipped-2")),
3512                    End(BytesEnd::new("skipped-2")),
3513                ]
3514            );
3515
3516            // start_replay() should start replay from this point
3517            let checkpoint2 = de.skip_checkpoint();
3518            assert_eq!(checkpoint2, 4);
3519
3520            // Should skip third and forth <skipped-N/> elements
3521            de.skip().unwrap(); // skipped-3
3522            de.skip().unwrap(); // skipped-4
3523            assert_eq!(de.read, vec![]);
3524            assert_eq!(
3525                de.write,
3526                vec![
3527                    // checkpoint 1
3528                    Start(BytesStart::new("skipped-1")),
3529                    End(BytesEnd::new("skipped-1")),
3530                    Start(BytesStart::new("skipped-2")),
3531                    End(BytesEnd::new("skipped-2")),
3532                    // checkpoint 2
3533                    Start(BytesStart::new("skipped-3")),
3534                    End(BytesEnd::new("skipped-3")),
3535                    Start(BytesStart::new("skipped-4")),
3536                    End(BytesEnd::new("skipped-4")),
3537                ]
3538            );
3539            assert_eq!(de.next().unwrap(), Start(BytesStart::new("target-2")));
3540            assert_eq!(de.next().unwrap(), End(BytesEnd::new("target-2")));
3541            assert_eq!(de.peek().unwrap(), &End(BytesEnd::new("inner")));
3542            assert_eq!(
3543                de.read,
3544                vec![
3545                    // This comment here to keep the same formatting of both arrays
3546                    // otherwise rustfmt suggest one-line it
3547                    End(BytesEnd::new("inner")),
3548                ]
3549            );
3550            assert_eq!(
3551                de.write,
3552                vec![
3553                    // checkpoint 1
3554                    Start(BytesStart::new("skipped-1")),
3555                    End(BytesEnd::new("skipped-1")),
3556                    Start(BytesStart::new("skipped-2")),
3557                    End(BytesEnd::new("skipped-2")),
3558                    // checkpoint 2
3559                    Start(BytesStart::new("skipped-3")),
3560                    End(BytesEnd::new("skipped-3")),
3561                    Start(BytesStart::new("skipped-4")),
3562                    End(BytesEnd::new("skipped-4")),
3563                ]
3564            );
3565
3566            // Start replay events from checkpoint 2
3567            de.start_replay(checkpoint2);
3568            assert_eq!(
3569                de.read,
3570                vec![
3571                    Start(BytesStart::new("skipped-3")),
3572                    End(BytesEnd::new("skipped-3")),
3573                    Start(BytesStart::new("skipped-4")),
3574                    End(BytesEnd::new("skipped-4")),
3575                    End(BytesEnd::new("inner")),
3576                ]
3577            );
3578            assert_eq!(
3579                de.write,
3580                vec![
3581                    Start(BytesStart::new("skipped-1")),
3582                    End(BytesEnd::new("skipped-1")),
3583                    Start(BytesStart::new("skipped-2")),
3584                    End(BytesEnd::new("skipped-2")),
3585                ]
3586            );
3587
3588            // Replayed events
3589            assert_eq!(de.next().unwrap(), Start(BytesStart::new("skipped-3")));
3590            assert_eq!(de.next().unwrap(), End(BytesEnd::new("skipped-3")));
3591            assert_eq!(de.next().unwrap(), Start(BytesStart::new("skipped-4")));
3592            assert_eq!(de.next().unwrap(), End(BytesEnd::new("skipped-4")));
3593
3594            assert_eq!(de.next().unwrap(), End(BytesEnd::new("inner")));
3595            assert_eq!(de.read, vec![]);
3596            assert_eq!(
3597                de.write,
3598                vec![
3599                    Start(BytesStart::new("skipped-1")),
3600                    End(BytesEnd::new("skipped-1")),
3601                    Start(BytesStart::new("skipped-2")),
3602                    End(BytesEnd::new("skipped-2")),
3603                ]
3604            );
3605
3606            ////////////////////////////////////////////////////////////////////////////////////////
3607
3608            // New events
3609            assert_eq!(de.next().unwrap(), Start(BytesStart::new("target-1")));
3610            assert_eq!(de.next().unwrap(), End(BytesEnd::new("target-1")));
3611
3612            assert_eq!(de.read, vec![]);
3613            assert_eq!(
3614                de.write,
3615                vec![
3616                    Start(BytesStart::new("skipped-1")),
3617                    End(BytesEnd::new("skipped-1")),
3618                    Start(BytesStart::new("skipped-2")),
3619                    End(BytesEnd::new("skipped-2")),
3620                ]
3621            );
3622
3623            // Start replay events from checkpoint 1
3624            de.start_replay(checkpoint1);
3625            assert_eq!(
3626                de.read,
3627                vec![
3628                    Start(BytesStart::new("skipped-1")),
3629                    End(BytesEnd::new("skipped-1")),
3630                    Start(BytesStart::new("skipped-2")),
3631                    End(BytesEnd::new("skipped-2")),
3632                ]
3633            );
3634            assert_eq!(de.write, vec![]);
3635
3636            // Replayed events
3637            assert_eq!(de.next().unwrap(), Start(BytesStart::new("skipped-1")));
3638            assert_eq!(de.next().unwrap(), End(BytesEnd::new("skipped-1")));
3639            assert_eq!(de.next().unwrap(), Start(BytesStart::new("skipped-2")));
3640            assert_eq!(de.next().unwrap(), End(BytesEnd::new("skipped-2")));
3641
3642            assert_eq!(de.read, vec![]);
3643            assert_eq!(de.write, vec![]);
3644
3645            // New events
3646            assert_eq!(de.next().unwrap(), End(BytesEnd::new("root")));
3647            assert_eq!(de.next().unwrap(), Eof);
3648        }
3649
3650        /// Checks that limiting buffer size works correctly
3651        #[test]
3652        fn limit() {
3653            use serde::Deserialize;
3654
3655            #[derive(Debug, Deserialize)]
3656            #[allow(unused)]
3657            struct List {
3658                item: Vec<()>,
3659            }
3660
3661            let mut de = make_de(
3662                r#"
3663                <any-name>
3664                    <item/>
3665                    <another-item>
3666                        <some-element>with text</some-element>
3667                        <yet-another-element/>
3668                    </another-item>
3669                    <item/>
3670                    <item/>
3671                </any-name>
3672                "#,
3673            );
3674            de.event_buffer_size(NonZeroUsize::new(3));
3675
3676            match List::deserialize(&mut de) {
3677                Err(DeError::TooManyEvents(count)) => assert_eq!(count.get(), 3),
3678                e => panic!("Expected `Err(TooManyEvents(3))`, but got `{:?}`", e),
3679            }
3680        }
3681
3682        /// Without handling Eof in `skip` this test failed with memory allocation
3683        #[test]
3684        fn invalid_xml() {
3685            use crate::de::DeEvent::*;
3686
3687            let mut de = make_de("<root>");
3688
3689            // Cache all events
3690            let checkpoint = de.skip_checkpoint();
3691            de.skip().unwrap();
3692            de.start_replay(checkpoint);
3693            assert_eq!(de.read, vec![Start(BytesStart::new("root")), Eof]);
3694        }
3695    }
3696
3697    mod read_to_end {
3698        use super::*;
3699        use crate::de::DeEvent::*;
3700        use pretty_assertions::assert_eq;
3701
3702        #[test]
3703        fn complex() {
3704            let mut de = make_de(
3705                r#"
3706                <root>
3707                    <tag a="1"><tag>text</tag>content</tag>
3708                    <tag a="2"><![CDATA[cdata content]]></tag>
3709                    <self-closed/>
3710                </root>
3711                "#,
3712            );
3713
3714            assert_eq!(de.next().unwrap(), Start(BytesStart::new("root")));
3715
3716            assert_eq!(
3717                de.next().unwrap(),
3718                Start(BytesStart::from_content(r#"tag a="1""#, 3))
3719            );
3720            assert_eq!(de.read_to_end(QName(b"tag")).unwrap(), ());
3721
3722            assert_eq!(
3723                de.next().unwrap(),
3724                Start(BytesStart::from_content(r#"tag a="2""#, 3))
3725            );
3726            assert_eq!(de.next().unwrap(), Text("cdata content".into()));
3727            assert_eq!(de.next().unwrap(), End(BytesEnd::new("tag")));
3728
3729            assert_eq!(de.next().unwrap(), Start(BytesStart::new("self-closed")));
3730            assert_eq!(de.read_to_end(QName(b"self-closed")).unwrap(), ());
3731
3732            assert_eq!(de.next().unwrap(), End(BytesEnd::new("root")));
3733            assert_eq!(de.next().unwrap(), Eof);
3734        }
3735
3736        #[test]
3737        fn invalid_xml1() {
3738            let mut de = make_de("<tag><tag></tag>");
3739
3740            assert_eq!(de.next().unwrap(), Start(BytesStart::new("tag")));
3741            assert_eq!(de.peek().unwrap(), &Start(BytesStart::new("tag")));
3742
3743            match de.read_to_end(QName(b"tag")) {
3744                Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
3745                    assert_eq!(cause, IllFormedError::MissingEndTag("tag".into()))
3746                }
3747                x => panic!(
3748                    "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
3749                    x
3750                ),
3751            }
3752            assert_eq!(de.next().unwrap(), Eof);
3753        }
3754
3755        #[test]
3756        fn invalid_xml2() {
3757            let mut de = make_de("<tag><![CDATA[]]><tag></tag>");
3758
3759            assert_eq!(de.next().unwrap(), Start(BytesStart::new("tag")));
3760            assert_eq!(de.peek().unwrap(), &Text("".into()));
3761
3762            match de.read_to_end(QName(b"tag")) {
3763                Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
3764                    assert_eq!(cause, IllFormedError::MissingEndTag("tag".into()))
3765                }
3766                x => panic!(
3767                    "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
3768                    x
3769                ),
3770            }
3771            assert_eq!(de.next().unwrap(), Eof);
3772        }
3773    }
3774
3775    #[test]
3776    fn borrowing_reader_parity() {
3777        let s = r#"
3778            <item name="hello" source="world.rs">Some text</item>
3779            <item2/>
3780            <item3 value="world" />
3781        "#;
3782
3783        let mut reader1 = IoReader {
3784            reader: Reader::from_reader(s.as_bytes()),
3785            start_trimmer: StartTrimmer::default(),
3786            buf: Vec::new(),
3787        };
3788        let mut reader2 = SliceReader {
3789            reader: Reader::from_str(s),
3790            start_trimmer: StartTrimmer::default(),
3791        };
3792
3793        loop {
3794            let event1 = reader1.next().unwrap();
3795            let event2 = reader2.next().unwrap();
3796
3797            if let (PayloadEvent::Eof, PayloadEvent::Eof) = (&event1, &event2) {
3798                break;
3799            }
3800
3801            assert_eq!(event1, event2);
3802        }
3803    }
3804
3805    #[test]
3806    fn borrowing_reader_events() {
3807        let s = r#"
3808            <item name="hello" source="world.rs">Some text</item>
3809            <item2></item2>
3810            <item3/>
3811            <item4 value="world" />
3812        "#;
3813
3814        let mut reader = SliceReader {
3815            reader: Reader::from_str(s),
3816            start_trimmer: StartTrimmer::default(),
3817        };
3818
3819        let config = reader.reader.config_mut();
3820        config.expand_empty_elements = true;
3821
3822        let mut events = Vec::new();
3823
3824        loop {
3825            let event = reader.next().unwrap();
3826            if let PayloadEvent::Eof = event {
3827                break;
3828            }
3829            events.push(event);
3830        }
3831
3832        use crate::de::PayloadEvent::*;
3833
3834        assert_eq!(
3835            events,
3836            vec![
3837                Start(BytesStart::from_content(
3838                    r#"item name="hello" source="world.rs""#,
3839                    4
3840                )),
3841                Text(BytesText::from_escaped("Some text")),
3842                End(BytesEnd::new("item")),
3843                Start(BytesStart::from_content("item2", 5)),
3844                End(BytesEnd::new("item2")),
3845                Start(BytesStart::from_content("item3", 5)),
3846                End(BytesEnd::new("item3")),
3847                Start(BytesStart::from_content(r#"item4 value="world" "#, 5)),
3848                End(BytesEnd::new("item4")),
3849            ]
3850        )
3851    }
3852
3853    /// Ensures, that [`Deserializer::read_string()`] never can get an `End` event,
3854    /// because parser reports error early
3855    #[test]
3856    fn read_string() {
3857        match from_str::<String>(r#"</root>"#) {
3858            Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
3859                assert_eq!(cause, IllFormedError::UnmatchedEndTag("root".into()));
3860            }
3861            x => panic!(
3862                "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
3863                x
3864            ),
3865        }
3866
3867        let s: String = from_str(r#"<root></root>"#).unwrap();
3868        assert_eq!(s, "");
3869
3870        match from_str::<String>(r#"<root></other>"#) {
3871            Err(DeError::InvalidXml(Error::IllFormed(cause))) => assert_eq!(
3872                cause,
3873                IllFormedError::MismatchedEndTag {
3874                    expected: "root".into(),
3875                    found: "other".into(),
3876                }
3877            ),
3878            x => panic!("Expected `Err(InvalidXml(IllFormed(_))`, but got `{:?}`", x),
3879        }
3880    }
3881
3882    /// Tests for https://github.com/tafia/quick-xml/issues/474.
3883    ///
3884    /// That tests ensures that comments and processed instructions is ignored
3885    /// and can split one logical string in pieces.
3886    mod merge_text {
3887        use super::*;
3888        use pretty_assertions::assert_eq;
3889
3890        #[test]
3891        fn text() {
3892            let mut de = make_de("text");
3893            assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
3894        }
3895
3896        #[test]
3897        fn cdata() {
3898            let mut de = make_de("<![CDATA[cdata]]>");
3899            assert_eq!(de.next().unwrap(), DeEvent::Text("cdata".into()));
3900        }
3901
3902        #[test]
3903        fn text_and_cdata() {
3904            let mut de = make_de("text and <![CDATA[cdata]]>");
3905            assert_eq!(de.next().unwrap(), DeEvent::Text("text and cdata".into()));
3906        }
3907
3908        #[test]
3909        fn text_and_empty_cdata() {
3910            let mut de = make_de("text and <![CDATA[]]>");
3911            assert_eq!(de.next().unwrap(), DeEvent::Text("text and ".into()));
3912        }
3913
3914        #[test]
3915        fn cdata_and_text() {
3916            let mut de = make_de("<![CDATA[cdata]]> and text");
3917            assert_eq!(de.next().unwrap(), DeEvent::Text("cdata and text".into()));
3918        }
3919
3920        #[test]
3921        fn empty_cdata_and_text() {
3922            let mut de = make_de("<![CDATA[]]> and text");
3923            assert_eq!(de.next().unwrap(), DeEvent::Text(" and text".into()));
3924        }
3925
3926        #[test]
3927        fn cdata_and_cdata() {
3928            let mut de = make_de(
3929                "\
3930                    <![CDATA[cdata]]]]>\
3931                    <![CDATA[>cdata]]>\
3932                ",
3933            );
3934            assert_eq!(de.next().unwrap(), DeEvent::Text("cdata]]>cdata".into()));
3935        }
3936
3937        mod comment_between {
3938            use super::*;
3939            use pretty_assertions::assert_eq;
3940
3941            #[test]
3942            fn text() {
3943                let mut de = make_de(
3944                    "\
3945                        text \
3946                        <!--comment 1--><!--comment 2--> \
3947                        text\
3948                    ",
3949                );
3950                assert_eq!(de.next().unwrap(), DeEvent::Text("text  text".into()));
3951            }
3952
3953            #[test]
3954            fn cdata() {
3955                let mut de = make_de(
3956                    "\
3957                        <![CDATA[cdata]]]]>\
3958                        <!--comment 1--><!--comment 2-->\
3959                        <![CDATA[>cdata]]>\
3960                    ",
3961                );
3962                assert_eq!(de.next().unwrap(), DeEvent::Text("cdata]]>cdata".into()));
3963            }
3964
3965            #[test]
3966            fn text_and_cdata() {
3967                let mut de = make_de(
3968                    "\
3969                        text \
3970                        <!--comment 1--><!--comment 2-->\
3971                        <![CDATA[ cdata]]>\
3972                    ",
3973                );
3974                assert_eq!(de.next().unwrap(), DeEvent::Text("text  cdata".into()));
3975            }
3976
3977            #[test]
3978            fn text_and_empty_cdata() {
3979                let mut de = make_de(
3980                    "\
3981                        text \
3982                        <!--comment 1--><!--comment 2-->\
3983                        <![CDATA[]]>\
3984                    ",
3985                );
3986                assert_eq!(de.next().unwrap(), DeEvent::Text("text ".into()));
3987            }
3988
3989            #[test]
3990            fn cdata_and_text() {
3991                let mut de = make_de(
3992                    "\
3993                        <![CDATA[cdata ]]>\
3994                        <!--comment 1--><!--comment 2--> \
3995                        text \
3996                    ",
3997                );
3998                assert_eq!(de.next().unwrap(), DeEvent::Text("cdata  text".into()));
3999            }
4000
4001            #[test]
4002            fn empty_cdata_and_text() {
4003                let mut de = make_de(
4004                    "\
4005                        <![CDATA[]]>\
4006                        <!--comment 1--><!--comment 2--> \
4007                        text \
4008                    ",
4009                );
4010                assert_eq!(de.next().unwrap(), DeEvent::Text(" text".into()));
4011            }
4012
4013            #[test]
4014            fn cdata_and_cdata() {
4015                let mut de = make_de(
4016                    "\
4017                        <![CDATA[cdata]]]>\
4018                        <!--comment 1--><!--comment 2-->\
4019                        <![CDATA[]>cdata]]>\
4020                    ",
4021                );
4022                assert_eq!(de.next().unwrap(), DeEvent::Text("cdata]]>cdata".into()));
4023            }
4024        }
4025
4026        mod pi_between {
4027            use super::*;
4028            use pretty_assertions::assert_eq;
4029
4030            #[test]
4031            fn text() {
4032                let mut de = make_de(
4033                    "\
4034                        text \
4035                        <?pi 1?><?pi 2?> \
4036                        text\
4037                    ",
4038                );
4039                assert_eq!(de.next().unwrap(), DeEvent::Text("text  text".into()));
4040            }
4041
4042            #[test]
4043            fn cdata() {
4044                let mut de = make_de(
4045                    "\
4046                        <![CDATA[cdata]]]]>\
4047                        <?pi 1?><?pi 2?>\
4048                        <![CDATA[>cdata]]>\
4049                    ",
4050                );
4051                assert_eq!(de.next().unwrap(), DeEvent::Text("cdata]]>cdata".into()));
4052            }
4053
4054            #[test]
4055            fn text_and_cdata() {
4056                let mut de = make_de(
4057                    "\
4058                        text \
4059                        <?pi 1?><?pi 2?>\
4060                        <![CDATA[ cdata]]>\
4061                    ",
4062                );
4063                assert_eq!(de.next().unwrap(), DeEvent::Text("text  cdata".into()));
4064            }
4065
4066            #[test]
4067            fn text_and_empty_cdata() {
4068                let mut de = make_de(
4069                    "\
4070                        text \
4071                        <?pi 1?><?pi 2?>\
4072                        <![CDATA[]]>\
4073                    ",
4074                );
4075                assert_eq!(de.next().unwrap(), DeEvent::Text("text ".into()));
4076            }
4077
4078            #[test]
4079            fn cdata_and_text() {
4080                let mut de = make_de(
4081                    "\
4082                        <![CDATA[cdata ]]>\
4083                        <?pi 1?><?pi 2?> \
4084                        text \
4085                    ",
4086                );
4087                assert_eq!(de.next().unwrap(), DeEvent::Text("cdata  text".into()));
4088            }
4089
4090            #[test]
4091            fn empty_cdata_and_text() {
4092                let mut de = make_de(
4093                    "\
4094                        <![CDATA[]]>\
4095                        <?pi 1?><?pi 2?> \
4096                        text \
4097                    ",
4098                );
4099                assert_eq!(de.next().unwrap(), DeEvent::Text(" text".into()));
4100            }
4101
4102            #[test]
4103            fn cdata_and_cdata() {
4104                let mut de = make_de(
4105                    "\
4106                        <![CDATA[cdata]]]>\
4107                        <?pi 1?><?pi 2?>\
4108                        <![CDATA[]>cdata]]>\
4109                    ",
4110                );
4111                assert_eq!(de.next().unwrap(), DeEvent::Text("cdata]]>cdata".into()));
4112            }
4113        }
4114    }
4115
4116    /// Tests for https://github.com/tafia/quick-xml/issues/474.
4117    ///
4118    /// This tests ensures that any combination of payload data is processed
4119    /// as expected.
4120    mod triples {
4121        use super::*;
4122        use pretty_assertions::assert_eq;
4123
4124        mod start {
4125            use super::*;
4126
4127            /// <tag1><tag2>...
4128            mod start {
4129                use super::*;
4130                use pretty_assertions::assert_eq;
4131
4132                #[test]
4133                fn start() {
4134                    let mut de = make_de("<tag1><tag2><tag3>");
4135                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1")));
4136                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2")));
4137                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag3")));
4138                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4139                }
4140
4141                /// Not matching end tag will result to error
4142                #[test]
4143                fn end() {
4144                    let mut de = make_de("<tag1><tag2></tag2>");
4145                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1")));
4146                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2")));
4147                    assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag2")));
4148                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4149                }
4150
4151                #[test]
4152                fn text() {
4153                    let mut de = make_de("<tag1><tag2> text ");
4154                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1")));
4155                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2")));
4156                    // Text is trimmed from both sides
4157                    assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4158                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4159                }
4160
4161                #[test]
4162                fn cdata() {
4163                    let mut de = make_de("<tag1><tag2><![CDATA[ cdata ]]>");
4164                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1")));
4165                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2")));
4166                    assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4167                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4168                }
4169
4170                #[test]
4171                fn eof() {
4172                    let mut de = make_de("<tag1><tag2>");
4173                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1")));
4174                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2")));
4175                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4176                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4177                }
4178            }
4179
4180            /// <tag></tag>...
4181            mod end {
4182                use super::*;
4183                use pretty_assertions::assert_eq;
4184
4185                #[test]
4186                fn start() {
4187                    let mut de = make_de("<tag></tag><tag2>");
4188                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4189                    assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag")));
4190                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2")));
4191                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4192                }
4193
4194                #[test]
4195                fn end() {
4196                    let mut de = make_de("<tag></tag></tag2>");
4197                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4198                    assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag")));
4199                    match de.next() {
4200                        Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
4201                            assert_eq!(cause, IllFormedError::UnmatchedEndTag("tag2".into()));
4202                        }
4203                        x => panic!(
4204                            "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
4205                            x
4206                        ),
4207                    }
4208                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4209                }
4210
4211                #[test]
4212                fn text() {
4213                    let mut de = make_de("<tag></tag> text ");
4214                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4215                    assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag")));
4216                    // Text is trimmed from both sides
4217                    assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4218                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4219                }
4220
4221                #[test]
4222                fn cdata() {
4223                    let mut de = make_de("<tag></tag><![CDATA[ cdata ]]>");
4224                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4225                    assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag")));
4226                    assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4227                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4228                }
4229
4230                #[test]
4231                fn eof() {
4232                    let mut de = make_de("<tag></tag>");
4233                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4234                    assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag")));
4235                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4236                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4237                }
4238            }
4239
4240            /// <tag> text ...
4241            mod text {
4242                use super::*;
4243                use pretty_assertions::assert_eq;
4244
4245                #[test]
4246                fn start() {
4247                    let mut de = make_de("<tag> text <tag2>");
4248                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4249                    // Text is trimmed from both sides
4250                    assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4251                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2")));
4252                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4253                }
4254
4255                #[test]
4256                fn end() {
4257                    let mut de = make_de("<tag> text </tag>");
4258                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4259                    // Text is trimmed from both sides
4260                    assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4261                    assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag")));
4262                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4263                }
4264
4265                // start::text::text has no difference from start::text
4266
4267                #[test]
4268                fn cdata() {
4269                    let mut de = make_de("<tag> text <![CDATA[ cdata ]]>");
4270                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4271                    // Text is trimmed from the start
4272                    assert_eq!(de.next().unwrap(), DeEvent::Text("text  cdata ".into()));
4273                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4274                }
4275
4276                #[test]
4277                fn eof() {
4278                    let mut de = make_de("<tag> text ");
4279                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4280                    // Text is trimmed from both sides
4281                    assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4282                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4283                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4284                }
4285            }
4286
4287            /// <tag><![CDATA[ cdata ]]>...
4288            mod cdata {
4289                use super::*;
4290                use pretty_assertions::assert_eq;
4291
4292                #[test]
4293                fn start() {
4294                    let mut de = make_de("<tag><![CDATA[ cdata ]]><tag2>");
4295                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4296                    assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4297                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2")));
4298                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4299                }
4300
4301                #[test]
4302                fn end() {
4303                    let mut de = make_de("<tag><![CDATA[ cdata ]]></tag>");
4304                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4305                    assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4306                    assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag")));
4307                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4308                }
4309
4310                #[test]
4311                fn text() {
4312                    let mut de = make_de("<tag><![CDATA[ cdata ]]> text ");
4313                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4314                    // Text is trimmed from the end
4315                    assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata  text".into()));
4316                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4317                }
4318
4319                #[test]
4320                fn cdata() {
4321                    let mut de = make_de("<tag><![CDATA[ cdata ]]><![CDATA[ cdata2 ]]>");
4322                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4323                    assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata  cdata2 ".into()));
4324                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4325                }
4326
4327                #[test]
4328                fn eof() {
4329                    let mut de = make_de("<tag><![CDATA[ cdata ]]>");
4330                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4331                    assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4332                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4333                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4334                }
4335            }
4336        }
4337
4338        /// Start from End event will always generate an error
4339        #[test]
4340        fn end() {
4341            let mut de = make_de("</tag>");
4342            match de.next() {
4343                Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
4344                    assert_eq!(cause, IllFormedError::UnmatchedEndTag("tag".into()));
4345                }
4346                x => panic!(
4347                    "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
4348                    x
4349                ),
4350            }
4351            assert_eq!(de.next().unwrap(), DeEvent::Eof);
4352        }
4353
4354        mod text {
4355            use super::*;
4356            use pretty_assertions::assert_eq;
4357
4358            mod start {
4359                use super::*;
4360                use pretty_assertions::assert_eq;
4361
4362                #[test]
4363                fn start() {
4364                    let mut de = make_de(" text <tag1><tag2>");
4365                    // Text is trimmed from both sides
4366                    assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4367                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1")));
4368                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2")));
4369                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4370                }
4371
4372                /// Not matching end tag will result in error
4373                #[test]
4374                fn end() {
4375                    let mut de = make_de(" text <tag></tag>");
4376                    // Text is trimmed from both sides
4377                    assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4378                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4379                    assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag")));
4380                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4381                }
4382
4383                #[test]
4384                fn text() {
4385                    let mut de = make_de(" text <tag> text2 ");
4386                    // Text is trimmed from both sides
4387                    assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4388                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4389                    // Text is trimmed from both sides
4390                    assert_eq!(de.next().unwrap(), DeEvent::Text("text2".into()));
4391                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4392                }
4393
4394                #[test]
4395                fn cdata() {
4396                    let mut de = make_de(" text <tag><![CDATA[ cdata ]]>");
4397                    // Text is trimmed from both sides
4398                    assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4399                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4400                    assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4401                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4402                }
4403
4404                #[test]
4405                fn eof() {
4406                    // Text is trimmed from both sides
4407                    let mut de = make_de(" text <tag>");
4408                    assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4409                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4410                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4411                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4412                }
4413            }
4414
4415            /// End event without corresponding start event will always generate an error
4416            #[test]
4417            fn end() {
4418                let mut de = make_de(" text </tag>");
4419                // Text is trimmed from both sides
4420                assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4421                match de.next() {
4422                    Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
4423                        assert_eq!(cause, IllFormedError::UnmatchedEndTag("tag".into()));
4424                    }
4425                    x => panic!(
4426                        "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
4427                        x
4428                    ),
4429                }
4430                assert_eq!(de.next().unwrap(), DeEvent::Eof);
4431            }
4432
4433            // text::text::something is equivalent to text::something
4434
4435            mod cdata {
4436                use super::*;
4437                use pretty_assertions::assert_eq;
4438
4439                #[test]
4440                fn start() {
4441                    let mut de = make_de(" text <![CDATA[ cdata ]]><tag>");
4442                    // Text is trimmed from the start
4443                    assert_eq!(de.next().unwrap(), DeEvent::Text("text  cdata ".into()));
4444                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4445                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4446                }
4447
4448                #[test]
4449                fn end() {
4450                    let mut de = make_de(" text <![CDATA[ cdata ]]></tag>");
4451                    // Text is trimmed from the start
4452                    assert_eq!(de.next().unwrap(), DeEvent::Text("text  cdata ".into()));
4453                    match de.next() {
4454                        Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
4455                            assert_eq!(cause, IllFormedError::UnmatchedEndTag("tag".into()));
4456                        }
4457                        x => panic!(
4458                            "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
4459                            x
4460                        ),
4461                    }
4462                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4463                }
4464
4465                #[test]
4466                fn text() {
4467                    let mut de = make_de(" text <![CDATA[ cdata ]]> text2 ");
4468                    // Text is trimmed from the start and from the end
4469                    assert_eq!(
4470                        de.next().unwrap(),
4471                        DeEvent::Text("text  cdata  text2".into())
4472                    );
4473                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4474                }
4475
4476                #[test]
4477                fn cdata() {
4478                    let mut de = make_de(" text <![CDATA[ cdata ]]><![CDATA[ cdata2 ]]>");
4479                    // Text is trimmed from the start
4480                    assert_eq!(
4481                        de.next().unwrap(),
4482                        DeEvent::Text("text  cdata  cdata2 ".into())
4483                    );
4484                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4485                }
4486
4487                #[test]
4488                fn eof() {
4489                    let mut de = make_de(" text <![CDATA[ cdata ]]>");
4490                    // Text is trimmed from the start
4491                    assert_eq!(de.next().unwrap(), DeEvent::Text("text  cdata ".into()));
4492                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4493                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4494                }
4495            }
4496        }
4497
4498        mod cdata {
4499            use super::*;
4500            use pretty_assertions::assert_eq;
4501
4502            mod start {
4503                use super::*;
4504                use pretty_assertions::assert_eq;
4505
4506                #[test]
4507                fn start() {
4508                    let mut de = make_de("<![CDATA[ cdata ]]><tag1><tag2>");
4509                    assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4510                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1")));
4511                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2")));
4512                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4513                }
4514
4515                /// Not matching end tag will result in error
4516                #[test]
4517                fn end() {
4518                    let mut de = make_de("<![CDATA[ cdata ]]><tag></tag>");
4519                    assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4520                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4521                    assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag")));
4522                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4523                }
4524
4525                #[test]
4526                fn text() {
4527                    let mut de = make_de("<![CDATA[ cdata ]]><tag> text ");
4528                    assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4529                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4530                    // Text is trimmed from both sides
4531                    assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4532                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4533                }
4534
4535                #[test]
4536                fn cdata() {
4537                    let mut de = make_de("<![CDATA[ cdata ]]><tag><![CDATA[ cdata2 ]]>");
4538                    assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4539                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4540                    assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata2 ".into()));
4541                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4542                }
4543
4544                #[test]
4545                fn eof() {
4546                    let mut de = make_de("<![CDATA[ cdata ]]><tag>");
4547                    assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4548                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4549                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4550                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4551                }
4552            }
4553
4554            /// End event without corresponding start event will always generate an error
4555            #[test]
4556            fn end() {
4557                let mut de = make_de("<![CDATA[ cdata ]]></tag>");
4558                assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4559                match de.next() {
4560                    Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
4561                        assert_eq!(cause, IllFormedError::UnmatchedEndTag("tag".into()));
4562                    }
4563                    x => panic!(
4564                        "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
4565                        x
4566                    ),
4567                }
4568                assert_eq!(de.next().unwrap(), DeEvent::Eof);
4569            }
4570
4571            mod text {
4572                use super::*;
4573                use pretty_assertions::assert_eq;
4574
4575                #[test]
4576                fn start() {
4577                    let mut de = make_de("<![CDATA[ cdata ]]> text <tag>");
4578                    // Text is trimmed from the end
4579                    assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata  text".into()));
4580                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4581                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4582                }
4583
4584                #[test]
4585                fn end() {
4586                    let mut de = make_de("<![CDATA[ cdata ]]> text </tag>");
4587                    // Text is trimmed from the end
4588                    assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata  text".into()));
4589                    match de.next() {
4590                        Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
4591                            assert_eq!(cause, IllFormedError::UnmatchedEndTag("tag".into()));
4592                        }
4593                        x => panic!(
4594                            "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
4595                            x
4596                        ),
4597                    }
4598                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4599                }
4600
4601                // cdata::text::text is equivalent to cdata::text
4602
4603                #[test]
4604                fn cdata() {
4605                    let mut de = make_de("<![CDATA[ cdata ]]> text <![CDATA[ cdata2 ]]>");
4606                    assert_eq!(
4607                        de.next().unwrap(),
4608                        DeEvent::Text(" cdata  text  cdata2 ".into())
4609                    );
4610                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4611                }
4612
4613                #[test]
4614                fn eof() {
4615                    let mut de = make_de("<![CDATA[ cdata ]]> text ");
4616                    // Text is trimmed from the end
4617                    assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata  text".into()));
4618                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4619                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4620                }
4621            }
4622
4623            mod cdata {
4624                use super::*;
4625                use pretty_assertions::assert_eq;
4626
4627                #[test]
4628                fn start() {
4629                    let mut de = make_de("<![CDATA[ cdata ]]><![CDATA[ cdata2 ]]><tag>");
4630                    assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata  cdata2 ".into()));
4631                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4632                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4633                }
4634
4635                #[test]
4636                fn end() {
4637                    let mut de = make_de("<![CDATA[ cdata ]]><![CDATA[ cdata2 ]]></tag>");
4638                    assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata  cdata2 ".into()));
4639                    match de.next() {
4640                        Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
4641                            assert_eq!(cause, IllFormedError::UnmatchedEndTag("tag".into()));
4642                        }
4643                        x => panic!(
4644                            "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
4645                            x
4646                        ),
4647                    }
4648                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4649                }
4650
4651                #[test]
4652                fn text() {
4653                    let mut de = make_de("<![CDATA[ cdata ]]><![CDATA[ cdata2 ]]> text ");
4654                    // Text is trimmed from the end
4655                    assert_eq!(
4656                        de.next().unwrap(),
4657                        DeEvent::Text(" cdata  cdata2  text".into())
4658                    );
4659                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4660                }
4661
4662                #[test]
4663                fn cdata() {
4664                    let mut de =
4665                        make_de("<![CDATA[ cdata ]]><![CDATA[ cdata2 ]]><![CDATA[ cdata3 ]]>");
4666                    assert_eq!(
4667                        de.next().unwrap(),
4668                        DeEvent::Text(" cdata  cdata2  cdata3 ".into())
4669                    );
4670                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4671                }
4672
4673                #[test]
4674                fn eof() {
4675                    let mut de = make_de("<![CDATA[ cdata ]]><![CDATA[ cdata2 ]]>");
4676                    assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata  cdata2 ".into()));
4677                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4678                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4679                }
4680            }
4681        }
4682    }
4683}