quick_xml/de/
mod.rs

1//! Serde `Deserializer` module.
2//!
3//! Due to the complexity of the XML standard and the fact that Serde was developed
4//! with JSON in mind, not all Serde concepts apply smoothly to XML. This leads to
5//! that fact that some XML concepts are inexpressible in terms of Serde derives
6//! and may require manual deserialization.
7//!
8//! The most notable restriction is the ability to distinguish between _elements_
9//! and _attributes_, as no other format used by serde has such a conception.
10//!
11//! Due to that the mapping is performed in a best effort manner.
12//!
13//!
14//!
15//! Table of Contents
16//! =================
17//! - [Mapping XML to Rust types](#mapping-xml-to-rust-types)
18//!   - [Basics](#basics)
19//!   - [Optional attributes and elements](#optional-attributes-and-elements)
20//!   - [Choices (`xs:choice` XML Schema type)](#choices-xschoice-xml-schema-type)
21//!   - [Sequences (`xs:all` and `xs:sequence` XML Schema types)](#sequences-xsall-and-xssequence-xml-schema-types)
22//! - [Mapping of `xsi:nil`](#mapping-of-xsinil)
23//! - [Generate Rust types from XML](#generate-rust-types-from-xml)
24//! - [Composition Rules](#composition-rules)
25//! - [Enum Representations](#enum-representations)
26//!   - [Normal enum variant](#normal-enum-variant)
27//!   - [`$text` enum variant](#text-enum-variant)
28//! - [`$text` and `$value` special names](#text-and-value-special-names)
29//!   - [`$text`](#text)
30//!   - [`$value`](#value)
31//!     - [Primitives and sequences of primitives](#primitives-and-sequences-of-primitives)
32//!     - [Structs and sequences of structs](#structs-and-sequences-of-structs)
33//!     - [Enums and sequences of enums](#enums-and-sequences-of-enums)
34//! - [Frequently Used Patterns](#frequently-used-patterns)
35//!   - [`<element>` lists](#element-lists)
36//!   - [Overlapped (Out-of-Order) Elements](#overlapped-out-of-order-elements)
37//!   - [Internally Tagged Enums](#internally-tagged-enums)
38//!
39//!
40//!
41//! Mapping XML to Rust types
42//! =========================
43//!
44//! Type names are never considered when deserializing, so you can name your
45//! types as you wish. Other general rules:
46//! - `struct` field name could be represented in XML only as an attribute name
47//!   or an element name;
48//! - `enum` variant name could be represented in XML only as an attribute name
49//!   or an element name;
50//! - the unit struct, unit type `()` and unit enum variant can be deserialized
51//!   from any valid XML content:
52//!   - attribute and element names;
53//!   - attribute and element values;
54//!   - text or CDATA content (including mixed text and CDATA content).
55//!
56//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
57//!
58//! NOTE: All tests are marked with an `ignore` option, even though they do
59//! compile. This is  because rustdoc marks such blocks with an information
60//! icon unlike `no_run` blocks.
61//!
62//! </div>
63//!
64//! <table>
65//! <thead>
66//! <tr><th colspan="2">
67//!
68//! ## Basics
69//!
70//! </th></tr>
71//! <tr><th>To parse all these XML's...</th><th>...use these Rust type(s)</th></tr>
72//! </thead>
73//! <tbody style="vertical-align:top;">
74//! <tr>
75//! <td>
76//! Content of attributes and text / CDATA content of elements (including mixed
77//! text and CDATA content):
78//!
79//! ```xml
80//! <... ...="content" />
81//! ```
82//! ```xml
83//! <...>content</...>
84//! ```
85//! ```xml
86//! <...><![CDATA[content]]></...>
87//! ```
88//! ```xml
89//! <...>text<![CDATA[cdata]]>text</...>
90//! ```
91//! Mixed text / CDATA content represents one logical string, `"textcdatatext"` in that case.
92//! </td>
93//! <td>
94//!
95//! You can use any type that can be deserialized from an `&str`, for example:
96//! - [`String`] and [`&str`]
97//! - [`Cow<str>`]
98//! - [`u32`], [`f32`] and other numeric types
99//! - `enum`s, like
100//!   ```
101//!   # use pretty_assertions::assert_eq;
102//!   # use serde::Deserialize;
103//!   # #[derive(Debug, PartialEq)]
104//!   #[derive(Deserialize)]
105//!   enum Language {
106//!     Rust,
107//!     Cpp,
108//!     #[serde(other)]
109//!     Other,
110//!   }
111//!   # #[derive(Debug, PartialEq, Deserialize)]
112//!   # struct X { #[serde(rename = "$text")] x: Language }
113//!   # assert_eq!(X { x: Language::Rust  }, quick_xml::de::from_str("<x>Rust</x>").unwrap());
114//!   # assert_eq!(X { x: Language::Cpp   }, quick_xml::de::from_str("<x>C<![CDATA[p]]>p</x>").unwrap());
115//!   # assert_eq!(X { x: Language::Other }, quick_xml::de::from_str("<x><![CDATA[other]]></x>").unwrap());
116//!   ```
117//!
118//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
119//!
120//! NOTE: deserialization to non-owned types (i.e. borrow from the input),
121//! such as `&str`, is possible only if you parse document in the UTF-8
122//! encoding and content does not contain entity references such as `&amp;`,
123//! or character references such as `&#xD;`, as well as text content represented
124//! by one piece of [text] or [CDATA] element.
125//! </div>
126//! <!-- TODO: document an error type returned -->
127//!
128//! [text]: Event::Text
129//! [CDATA]: Event::CData
130//! </td>
131//! </tr>
132//! <!-- 2 ===================================================================================== -->
133//! <tr>
134//! <td>
135//!
136//! Content of attributes and text / CDATA content of elements (including mixed
137//! text and CDATA content), which represents a space-delimited lists, as
138//! specified in the XML Schema specification for [`xs:list`] `simpleType`:
139//!
140//! ```xml
141//! <... ...="element1 element2 ..." />
142//! ```
143//! ```xml
144//! <...>
145//!   element1
146//!   element2
147//!   ...
148//! </...>
149//! ```
150//! ```xml
151//! <...><![CDATA[
152//!   element1
153//!   element2
154//!   ...
155//! ]]></...>
156//! ```
157//!
158//! [`xs:list`]: https://www.w3.org/TR/xmlschema11-2/#list-datatypes
159//! </td>
160//! <td>
161//!
162//! Use any type that deserialized using [`deserialize_seq()`] call, for example:
163//!
164//! ```
165//! type List = Vec<u32>;
166//! ```
167//!
168//! See the next row to learn where in your struct definition you should
169//! use that type.
170//!
171//! According to the XML Schema specification, delimiters for elements is one
172//! or more space (`' '`, `'\r'`, `'\n'`, and `'\t'`) character(s).
173//!
174//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
175//!
176//! NOTE: according to the XML Schema restrictions, you cannot escape those
177//! white-space characters, so list elements will _never_ contain them.
178//! In practice you will usually use `xs:list`s for lists of numbers or enumerated
179//! values which looks like identifiers in many languages, for example, `item`,
180//! `some_item` or `some-item`, so that shouldn't be a problem.
181//!
182//! NOTE: according to the XML Schema specification, list elements can be
183//! delimited only by spaces. Other delimiters (for example, commas) are not
184//! allowed.
185//!
186//! </div>
187//!
188//! [`deserialize_seq()`]: de::Deserializer::deserialize_seq
189//! </td>
190//! </tr>
191//! <!-- 3 ===================================================================================== -->
192//! <tr>
193//! <td>
194//! A typical XML with attributes. The root tag name does not matter:
195//!
196//! ```xml
197//! <any-tag one="..." two="..."/>
198//! ```
199//! </td>
200//! <td>
201//!
202//! A structure where each XML attribute is mapped to a field with a name
203//! starting with `@`. Because Rust identifiers do not permit the `@` character,
204//! you should use the `#[serde(rename = "@...")]` attribute to rename it.
205//! The name of the struct itself does not matter:
206//!
207//! ```
208//! # use serde::Deserialize;
209//! # type T = ();
210//! # type U = ();
211//! // Get both attributes
212//! # #[derive(Debug, PartialEq)]
213//! #[derive(Deserialize)]
214//! struct AnyName {
215//!   #[serde(rename = "@one")]
216//!   one: T,
217//!
218//!   #[serde(rename = "@two")]
219//!   two: U,
220//! }
221//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..." two="..."/>"#).unwrap();
222//! ```
223//! ```
224//! # use serde::Deserialize;
225//! # type T = ();
226//! // Get only the one attribute, ignore the other
227//! # #[derive(Debug, PartialEq)]
228//! #[derive(Deserialize)]
229//! struct AnyName {
230//!   #[serde(rename = "@one")]
231//!   one: T,
232//! }
233//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..." two="..."/>"#).unwrap();
234//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..."/>"#).unwrap();
235//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..."><one>...</one></any-tag>"#).unwrap();
236//! ```
237//! ```
238//! # use serde::Deserialize;
239//! // Ignore all attributes
240//! // You can also use the `()` type (unit type)
241//! # #[derive(Debug, PartialEq)]
242//! #[derive(Deserialize)]
243//! struct AnyName;
244//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..." two="..."/>"#).unwrap();
245//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..."><one>...</one></any-tag>"#).unwrap();
246//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag><one>...</one><two>...</two></any-tag>"#).unwrap();
247//! ```
248//!
249//! All these structs can be used to deserialize from an XML on the
250//! left side depending on amount of information that you want to get.
251//! Of course, you can combine them with elements extractor structs (see below).
252//!
253//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
254//!
255//! NOTE: XML allows you to have an attribute and an element with the same name
256//! inside the one element. quick-xml deals with that by prepending a `@` prefix
257//! to the name of attributes.
258//! </div>
259//! </td>
260//! </tr>
261//! <!-- 4 ===================================================================================== -->
262//! <tr>
263//! <td>
264//! A typical XML with child elements. The root tag name does not matter:
265//!
266//! ```xml
267//! <any-tag>
268//!   <one>...</one>
269//!   <two>...</two>
270//! </any-tag>
271//! ```
272//! </td>
273//! <td>
274//! A structure where each XML child element is mapped to the field.
275//! Each element name becomes a name of field. The name of the struct itself
276//! does not matter:
277//!
278//! ```
279//! # use serde::Deserialize;
280//! # type T = ();
281//! # type U = ();
282//! // Get both elements
283//! # #[derive(Debug, PartialEq)]
284//! #[derive(Deserialize)]
285//! struct AnyName {
286//!   one: T,
287//!   two: U,
288//! }
289//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag><one>...</one><two>...</two></any-tag>"#).unwrap();
290//! #
291//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..." two="..."/>"#).unwrap_err();
292//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..."><two>...</two></any-tag>"#).unwrap_err();
293//! ```
294//! ```
295//! # use serde::Deserialize;
296//! # type T = ();
297//! // Get only the one element, ignore the other
298//! # #[derive(Debug, PartialEq)]
299//! #[derive(Deserialize)]
300//! struct AnyName {
301//!   one: T,
302//! }
303//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag><one>...</one><two>...</two></any-tag>"#).unwrap();
304//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..."><one>...</one></any-tag>"#).unwrap();
305//! ```
306//! ```
307//! # use serde::Deserialize;
308//! // Ignore all elements
309//! // You can also use the `()` type (unit type)
310//! # #[derive(Debug, PartialEq)]
311//! #[derive(Deserialize)]
312//! struct AnyName;
313//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..." two="..."/>"#).unwrap();
314//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag><one>...</one><two>...</two></any-tag>"#).unwrap();
315//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..."><two>...</two></any-tag>"#).unwrap();
316//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..."><one>...</one></any-tag>"#).unwrap();
317//! ```
318//!
319//! All these structs can be used to deserialize from an XML on the
320//! left side depending on amount of information that you want to get.
321//! Of course, you can combine them with attributes extractor structs (see above).
322//!
323//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
324//!
325//! NOTE: XML allows you to have an attribute and an element with the same name
326//! inside the one element. quick-xml deals with that by prepending a `@` prefix
327//! to the name of attributes.
328//! </div>
329//! </td>
330//! </tr>
331//! <!-- 5 ===================================================================================== -->
332//! <tr>
333//! <td>
334//! An XML with an attribute and a child element named equally:
335//!
336//! ```xml
337//! <any-tag field="...">
338//!   <field>...</field>
339//! </any-tag>
340//! ```
341//! </td>
342//! <td>
343//!
344//! You MUST specify `#[serde(rename = "@field")]` on a field that will be used
345//! for an attribute:
346//!
347//! ```
348//! # use pretty_assertions::assert_eq;
349//! # use serde::Deserialize;
350//! # type T = ();
351//! # type U = ();
352//! # #[derive(Debug, PartialEq)]
353//! #[derive(Deserialize)]
354//! struct AnyName {
355//!   #[serde(rename = "@field")]
356//!   attribute: T,
357//!   field: U,
358//! }
359//! # assert_eq!(
360//! #   AnyName { attribute: (), field: () },
361//! #   quick_xml::de::from_str(r#"
362//! #     <any-tag field="...">
363//! #       <field>...</field>
364//! #     </any-tag>
365//! #   "#).unwrap(),
366//! # );
367//! ```
368//! </td>
369//! </tr>
370//! <!-- ======================================================================================= -->
371//! <tr><th colspan="2">
372//!
373//! ## Optional attributes and elements
374//!
375//! </th></tr>
376//! <tr><th>To parse all these XML's...</th><th>...use these Rust type(s)</th></tr>
377//! <!-- 6 ===================================================================================== -->
378//! <tr>
379//! <td>
380//! An optional XML attribute that you want to capture.
381//! The root tag name does not matter:
382//!
383//! ```xml
384//! <any-tag optional="..."/>
385//! ```
386//! ```xml
387//! <any-tag/>
388//! ```
389//! </td>
390//! <td>
391//!
392//! A structure with an optional field, renamed according to the requirements
393//! for attributes:
394//!
395//! ```
396//! # use pretty_assertions::assert_eq;
397//! # use serde::Deserialize;
398//! # type T = ();
399//! # #[derive(Debug, PartialEq)]
400//! #[derive(Deserialize)]
401//! struct AnyName {
402//!   #[serde(rename = "@optional")]
403//!   optional: Option<T>,
404//! }
405//! # assert_eq!(AnyName { optional: Some(()) }, quick_xml::de::from_str(r#"<any-tag optional="..."/>"#).unwrap());
406//! # assert_eq!(AnyName { optional: None     }, quick_xml::de::from_str(r#"<any-tag/>"#).unwrap());
407//! ```
408//! When the XML attribute is present, type `T` will be deserialized from
409//! an attribute value (which is a string). Note, that if `T = String` or other
410//! string type, the empty attribute is mapped to a `Some("")`, whereas `None`
411//! represents the missed attribute:
412//! ```xml
413//! <any-tag optional="..."/><!-- Some("...") -->
414//! <any-tag optional=""/>   <!-- Some("") -->
415//! <any-tag/>               <!-- None -->
416//! ```
417//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
418//!
419//! NOTE: The behaviour is not symmetric by default. `None` will be serialized as
420//! `optional=""`. This behaviour is consistent across serde crates. You should add
421//! `#[serde(skip_serializing_if = "Option::is_none")]` attribute to the field to
422//! skip `None`s.
423//! </div>
424//! </td>
425//! </tr>
426//! <!-- 7 ===================================================================================== -->
427//! <tr>
428//! <td>
429//! An optional XML elements that you want to capture.
430//! The root tag name does not matter:
431//!
432//! ```xml
433//! <any-tag/>
434//!   <optional>...</optional>
435//! </any-tag>
436//! ```
437//! ```xml
438//! <any-tag/>
439//!   <optional/>
440//! </any-tag>
441//! ```
442//! ```xml
443//! <any-tag/>
444//! ```
445//! </td>
446//! <td>
447//!
448//! A structure with an optional field:
449//!
450//! ```
451//! # use pretty_assertions::assert_eq;
452//! # use serde::Deserialize;
453//! # type T = ();
454//! # #[derive(Debug, PartialEq)]
455//! #[derive(Deserialize)]
456//! struct AnyName {
457//!   optional: Option<T>,
458//! }
459//! # assert_eq!(AnyName { optional: Some(()) }, quick_xml::de::from_str(r#"<any-tag><optional>...</optional></any-tag>"#).unwrap());
460//! # assert_eq!(AnyName { optional: None     }, quick_xml::de::from_str(r#"<any-tag/>"#).unwrap());
461//! ```
462//! When the XML element is present, type `T` will be deserialized from an
463//! element (which is a string or a multi-mapping -- i.e. mapping which can have
464//! duplicated keys).
465//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
466//!
467//! NOTE: The behaviour is not symmetric by default. `None` will be serialized as
468//! `<optional/>`. This behaviour is consistent across serde crates. You should add
469//! `#[serde(skip_serializing_if = "Option::is_none")]` attribute to the field to
470//! skip `None`s.
471//!
472//! NOTE: Deserializer will automatically handle a [`xsi:nil`] attribute and set field to `None`.
473//! For more info see [Mapping of `xsi:nil`](#mapping-of-xsinil).
474//! </div>
475//! </td>
476//! </tr>
477//! <!-- ======================================================================================= -->
478//! <tr><th colspan="2">
479//!
480//! ## Choices (`xs:choice` XML Schema type)
481//!
482//! </th></tr>
483//! <tr><th>To parse all these XML's...</th><th>...use these Rust type(s)</th></tr>
484//! <!-- 8 ===================================================================================== -->
485//! <tr>
486//! <td>
487//! An XML with different root tag names, as well as text / CDATA content:
488//!
489//! ```xml
490//! <one field1="...">...</one>
491//! ```
492//! ```xml
493//! <two>
494//!   <field2>...</field2>
495//! </two>
496//! ```
497//! ```xml
498//! Text <![CDATA[or (mixed)
499//! CDATA]]> content
500//! ```
501//! </td>
502//! <td>
503//!
504//! An enum where each variant has the name of a possible root tag. The name of
505//! the enum itself does not matter.
506//!
507//! If you need to get the textual content, mark a variant with `#[serde(rename = "$text")]`.
508//!
509//! All these structs can be used to deserialize from any XML on the
510//! left side depending on amount of information that you want to get:
511//!
512//! ```
513//! # use pretty_assertions::assert_eq;
514//! # use serde::Deserialize;
515//! # type T = ();
516//! # type U = ();
517//! # #[derive(Debug, PartialEq)]
518//! #[derive(Deserialize)]
519//! #[serde(rename_all = "snake_case")]
520//! enum AnyName {
521//!   One { #[serde(rename = "@field1")] field1: T },
522//!   Two { field2: U },
523//!
524//!   /// Use unit variant, if you do not care of a content.
525//!   /// You can use tuple variant if you want to parse
526//!   /// textual content as an xs:list.
527//!   /// Struct variants are will pass a string to the
528//!   /// struct enum variant visitor, which typically
529//!   /// returns Err(Custom)
530//!   #[serde(rename = "$text")]
531//!   Text(String),
532//! }
533//! # assert_eq!(AnyName::One { field1: () }, quick_xml::de::from_str(r#"<one field1="...">...</one>"#).unwrap());
534//! # assert_eq!(AnyName::Two { field2: () }, quick_xml::de::from_str(r#"<two><field2>...</field2></two>"#).unwrap());
535//! # assert_eq!(AnyName::Text("text  cdata ".into()), quick_xml::de::from_str(r#"text <![CDATA[ cdata ]]>"#).unwrap());
536//! ```
537//! ```
538//! # use pretty_assertions::assert_eq;
539//! # use serde::Deserialize;
540//! # type T = ();
541//! # #[derive(Debug, PartialEq)]
542//! #[derive(Deserialize)]
543//! struct Two {
544//!   field2: T,
545//! }
546//! # #[derive(Debug, PartialEq)]
547//! #[derive(Deserialize)]
548//! #[serde(rename_all = "snake_case")]
549//! enum AnyName {
550//!   // `field1` content discarded
551//!   One,
552//!   Two(Two),
553//!   #[serde(rename = "$text")]
554//!   Text,
555//! }
556//! # assert_eq!(AnyName::One,                     quick_xml::de::from_str(r#"<one field1="...">...</one>"#).unwrap());
557//! # assert_eq!(AnyName::Two(Two { field2: () }), quick_xml::de::from_str(r#"<two><field2>...</field2></two>"#).unwrap());
558//! # assert_eq!(AnyName::Text,                    quick_xml::de::from_str(r#"text <![CDATA[ cdata ]]>"#).unwrap());
559//! ```
560//! ```
561//! # use pretty_assertions::assert_eq;
562//! # use serde::Deserialize;
563//! # #[derive(Debug, PartialEq)]
564//! #[derive(Deserialize)]
565//! #[serde(rename_all = "snake_case")]
566//! enum AnyName {
567//!   One,
568//!   // the <two> and textual content will be mapped to this
569//!   #[serde(other)]
570//!   Other,
571//! }
572//! # assert_eq!(AnyName::One,   quick_xml::de::from_str(r#"<one field1="...">...</one>"#).unwrap());
573//! # assert_eq!(AnyName::Other, quick_xml::de::from_str(r#"<two><field2>...</field2></two>"#).unwrap());
574//! # assert_eq!(AnyName::Other, quick_xml::de::from_str(r#"text <![CDATA[ cdata ]]>"#).unwrap());
575//! ```
576//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
577//!
578//! NOTE: You should have variants for all possible tag names in your enum
579//! or have an `#[serde(other)]` variant.
580//! <!-- TODO: document an error type if that requirement is violated -->
581//! </div>
582//! </td>
583//! </tr>
584//! <!-- 9 ===================================================================================== -->
585//! <tr>
586//! <td>
587//!
588//! `<xs:choice>` embedded in the other element, and at the same time you want
589//! to get access to other attributes that can appear in the same container
590//! (`<any-tag>`). Also this case can be described, as if you want to choose
591//! Rust enum variant based on a tag name:
592//!
593//! ```xml
594//! <any-tag field="...">
595//!   <one>...</one>
596//! </any-tag>
597//! ```
598//! ```xml
599//! <any-tag field="...">
600//!   <two>...</two>
601//! </any-tag>
602//! ```
603//! ```xml
604//! <any-tag field="...">
605//!   Text <![CDATA[or (mixed)
606//!   CDATA]]> content
607//! </any-tag>
608//! ```
609//! </td>
610//! <td>
611//!
612//! A structure with a field which type is an `enum`.
613//!
614//! If you need to get a textual content, mark a variant with `#[serde(rename = "$text")]`.
615//!
616//! Names of the enum, struct, and struct field with `Choice` type does not matter:
617//!
618//! ```
619//! # use pretty_assertions::assert_eq;
620//! # use serde::Deserialize;
621//! # type T = ();
622//! # #[derive(Debug, PartialEq)]
623//! #[derive(Deserialize)]
624//! #[serde(rename_all = "snake_case")]
625//! enum Choice {
626//!   One,
627//!   Two,
628//!
629//!   /// Use unit variant, if you do not care of a content.
630//!   /// You can use tuple variant if you want to parse
631//!   /// textual content as an xs:list.
632//!   /// Struct variants are will pass a string to the
633//!   /// struct enum variant visitor, which typically
634//!   /// returns Err(Custom)
635//!   #[serde(rename = "$text")]
636//!   Text(String),
637//! }
638//! # #[derive(Debug, PartialEq)]
639//! #[derive(Deserialize)]
640//! struct AnyName {
641//!   #[serde(rename = "@field")]
642//!   field: T,
643//!
644//!   #[serde(rename = "$value")]
645//!   any_name: Choice,
646//! }
647//! # assert_eq!(
648//! #   AnyName { field: (), any_name: Choice::One },
649//! #   quick_xml::de::from_str(r#"<any-tag field="..."><one>...</one></any-tag>"#).unwrap(),
650//! # );
651//! # assert_eq!(
652//! #   AnyName { field: (), any_name: Choice::Two },
653//! #   quick_xml::de::from_str(r#"<any-tag field="..."><two>...</two></any-tag>"#).unwrap(),
654//! # );
655//! # assert_eq!(
656//! #   AnyName { field: (), any_name: Choice::Text("text  cdata ".into()) },
657//! #   quick_xml::de::from_str(r#"<any-tag field="...">text <![CDATA[ cdata ]]></any-tag>"#).unwrap(),
658//! # );
659//! ```
660//! </td>
661//! </tr>
662//! <!-- 10 ==================================================================================== -->
663//! <tr>
664//! <td>
665//!
666//! `<xs:choice>` embedded in the other element, and at the same time you want
667//! to get access to other elements that can appear in the same container
668//! (`<any-tag>`). Also this case can be described, as if you want to choose
669//! Rust enum variant based on a tag name:
670//!
671//! ```xml
672//! <any-tag>
673//!   <field>...</field>
674//!   <one>...</one>
675//! </any-tag>
676//! ```
677//! ```xml
678//! <any-tag>
679//!   <two>...</two>
680//!   <field>...</field>
681//! </any-tag>
682//! ```
683//! </td>
684//! <td>
685//!
686//! A structure with a field which type is an `enum`.
687//!
688//! Names of the enum, struct, and struct field with `Choice` type does not matter:
689//!
690//! ```
691//! # use pretty_assertions::assert_eq;
692//! # use serde::Deserialize;
693//! # type T = ();
694//! # #[derive(Debug, PartialEq)]
695//! #[derive(Deserialize)]
696//! #[serde(rename_all = "snake_case")]
697//! enum Choice {
698//!   One,
699//!   Two,
700//! }
701//! # #[derive(Debug, PartialEq)]
702//! #[derive(Deserialize)]
703//! struct AnyName {
704//!   field: T,
705//!
706//!   #[serde(rename = "$value")]
707//!   any_name: Choice,
708//! }
709//! # assert_eq!(
710//! #   AnyName { field: (), any_name: Choice::One },
711//! #   quick_xml::de::from_str(r#"<any-tag><field>...</field><one>...</one></any-tag>"#).unwrap(),
712//! # );
713//! # assert_eq!(
714//! #   AnyName { field: (), any_name: Choice::Two },
715//! #   quick_xml::de::from_str(r#"<any-tag><two>...</two><field>...</field></any-tag>"#).unwrap(),
716//! # );
717//! ```
718//!
719//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
720//!
721//! NOTE: if your `Choice` enum would contain an `#[serde(other)]`
722//! variant, element `<field>` will be mapped to the `field` and not to the enum
723//! variant.
724//! </div>
725//!
726//! </td>
727//! </tr>
728//! <!-- 11 ==================================================================================== -->
729//! <tr>
730//! <td>
731//!
732//! `<xs:choice>` encapsulated in other element with a fixed name:
733//!
734//! ```xml
735//! <any-tag field="...">
736//!   <choice>
737//!     <one>...</one>
738//!   </choice>
739//! </any-tag>
740//! ```
741//! ```xml
742//! <any-tag field="...">
743//!   <choice>
744//!     <two>...</two>
745//!   </choice>
746//! </any-tag>
747//! ```
748//! </td>
749//! <td>
750//!
751//! A structure with a field of an intermediate type with one field of `enum` type.
752//! Actually, this example is not necessary, because you can construct it by yourself
753//! using the composition rules that were described above. However the XML construction
754//! described here is very common, so it is shown explicitly.
755//!
756//! Names of the enum and struct does not matter:
757//!
758//! ```
759//! # use pretty_assertions::assert_eq;
760//! # use serde::Deserialize;
761//! # type T = ();
762//! # #[derive(Debug, PartialEq)]
763//! #[derive(Deserialize)]
764//! #[serde(rename_all = "snake_case")]
765//! enum Choice {
766//!   One,
767//!   Two,
768//! }
769//! # #[derive(Debug, PartialEq)]
770//! #[derive(Deserialize)]
771//! struct Holder {
772//!   #[serde(rename = "$value")]
773//!   any_name: Choice,
774//! }
775//! # #[derive(Debug, PartialEq)]
776//! #[derive(Deserialize)]
777//! struct AnyName {
778//!   #[serde(rename = "@field")]
779//!   field: T,
780//!
781//!   choice: Holder,
782//! }
783//! # assert_eq!(
784//! #   AnyName { field: (), choice: Holder { any_name: Choice::One } },
785//! #   quick_xml::de::from_str(r#"<any-tag field="..."><choice><one>...</one></choice></any-tag>"#).unwrap(),
786//! # );
787//! # assert_eq!(
788//! #   AnyName { field: (), choice: Holder { any_name: Choice::Two } },
789//! #   quick_xml::de::from_str(r#"<any-tag field="..."><choice><two>...</two></choice></any-tag>"#).unwrap(),
790//! # );
791//! ```
792//! </td>
793//! </tr>
794//! <!-- 12 ==================================================================================== -->
795//! <tr>
796//! <td>
797//!
798//! `<xs:choice>` encapsulated in other element with a fixed name:
799//!
800//! ```xml
801//! <any-tag>
802//!   <field>...</field>
803//!   <choice>
804//!     <one>...</one>
805//!   </choice>
806//! </any-tag>
807//! ```
808//! ```xml
809//! <any-tag>
810//!   <choice>
811//!     <two>...</two>
812//!   </choice>
813//!   <field>...</field>
814//! </any-tag>
815//! ```
816//! </td>
817//! <td>
818//!
819//! A structure with a field of an intermediate type with one field of `enum` type.
820//! Actually, this example is not necessary, because you can construct it by yourself
821//! using the composition rules that were described above. However the XML construction
822//! described here is very common, so it is shown explicitly.
823//!
824//! Names of the enum and struct does not matter:
825//!
826//! ```
827//! # use pretty_assertions::assert_eq;
828//! # use serde::Deserialize;
829//! # type T = ();
830//! # #[derive(Debug, PartialEq)]
831//! #[derive(Deserialize)]
832//! #[serde(rename_all = "snake_case")]
833//! enum Choice {
834//!   One,
835//!   Two,
836//! }
837//! # #[derive(Debug, PartialEq)]
838//! #[derive(Deserialize)]
839//! struct Holder {
840//!   #[serde(rename = "$value")]
841//!   any_name: Choice,
842//! }
843//! # #[derive(Debug, PartialEq)]
844//! #[derive(Deserialize)]
845//! struct AnyName {
846//!   field: T,
847//!
848//!   choice: Holder,
849//! }
850//! # assert_eq!(
851//! #   AnyName { field: (), choice: Holder { any_name: Choice::One } },
852//! #   quick_xml::de::from_str(r#"<any-tag><field>...</field><choice><one>...</one></choice></any-tag>"#).unwrap(),
853//! # );
854//! # assert_eq!(
855//! #   AnyName { field: (), choice: Holder { any_name: Choice::Two } },
856//! #   quick_xml::de::from_str(r#"<any-tag><choice><two>...</two></choice><field>...</field></any-tag>"#).unwrap(),
857//! # );
858//! ```
859//! </td>
860//! </tr>
861//! <!-- ======================================================================================== -->
862//! <tr><th colspan="2">
863//!
864//! ## Sequences (`xs:all` and `xs:sequence` XML Schema types)
865//!
866//! </th></tr>
867//! <tr><th>To parse all these XML's...</th><th>...use these Rust type(s)</th></tr>
868//! <!-- 13 ==================================================================================== -->
869//! <tr>
870//! <td>
871//! A sequence inside of a tag without a dedicated name:
872//!
873//! ```xml
874//! <any-tag/>
875//! ```
876//! ```xml
877//! <any-tag>
878//!   <item/>
879//! </any-tag>
880//! ```
881//! ```xml
882//! <any-tag>
883//!   <item/>
884//!   <item/>
885//!   <item/>
886//! </any-tag>
887//! ```
888//! </td>
889//! <td>
890//!
891//! A structure with a field which is a sequence type, for example, [`Vec`].
892//! Because XML syntax does not distinguish between empty sequences and missed
893//! elements, we should indicate that on the Rust side, because serde will require
894//! that field `item` exists. You can do that in two possible ways:
895//!
896//! Use the `#[serde(default)]` attribute for a [field] or the entire [struct]:
897//! ```
898//! # use pretty_assertions::assert_eq;
899//! # use serde::Deserialize;
900//! # type Item = ();
901//! # #[derive(Debug, PartialEq)]
902//! #[derive(Deserialize)]
903//! struct AnyName {
904//!   #[serde(default)]
905//!   item: Vec<Item>,
906//! }
907//! # assert_eq!(
908//! #   AnyName { item: vec![] },
909//! #   quick_xml::de::from_str(r#"<any-tag/>"#).unwrap(),
910//! # );
911//! # assert_eq!(
912//! #   AnyName { item: vec![()] },
913//! #   quick_xml::de::from_str(r#"<any-tag><item/></any-tag>"#).unwrap(),
914//! # );
915//! # assert_eq!(
916//! #   AnyName { item: vec![(), (), ()] },
917//! #   quick_xml::de::from_str(r#"<any-tag><item/><item/><item/></any-tag>"#).unwrap(),
918//! # );
919//! ```
920//!
921//! Use the [`Option`]. In that case inner array will always contains at least one
922//! element after deserialization:
923//! ```ignore
924//! # use pretty_assertions::assert_eq;
925//! # use serde::Deserialize;
926//! # type Item = ();
927//! # #[derive(Debug, PartialEq)]
928//! #[derive(Deserialize)]
929//! struct AnyName {
930//!   item: Option<Vec<Item>>,
931//! }
932//! # assert_eq!(
933//! #   AnyName { item: None },
934//! #   quick_xml::de::from_str(r#"<any-tag/>"#).unwrap(),
935//! # );
936//! # assert_eq!(
937//! #   AnyName { item: Some(vec![()]) },
938//! #   quick_xml::de::from_str(r#"<any-tag><item/></any-tag>"#).unwrap(),
939//! # );
940//! # assert_eq!(
941//! #   AnyName { item: Some(vec![(), (), ()]) },
942//! #   quick_xml::de::from_str(r#"<any-tag><item/><item/><item/></any-tag>"#).unwrap(),
943//! # );
944//! ```
945//!
946//! See also [Frequently Used Patterns](#element-lists).
947//!
948//! [field]: https://serde.rs/field-attrs.html#default
949//! [struct]: https://serde.rs/container-attrs.html#default
950//! </td>
951//! </tr>
952//! <!-- 14 ==================================================================================== -->
953//! <tr>
954//! <td>
955//! A sequence with a strict order, probably with mixed content
956//! (text / CDATA and tags):
957//!
958//! ```xml
959//! <one>...</one>
960//! text
961//! <![CDATA[cdata]]>
962//! <two>...</two>
963//! <one>...</one>
964//! ```
965//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
966//!
967//! NOTE: this is just an example for showing mapping. XML does not allow
968//! multiple root tags -- you should wrap the sequence into a tag.
969//! </div>
970//! </td>
971//! <td>
972//!
973//! All elements mapped to the heterogeneous sequential type: tuple or named tuple.
974//! Each element of the tuple should be able to be deserialized from the nested
975//! element content (`...`), except the enum types which would be deserialized
976//! from the full element (`<one>...</one>`), so they could use the element name
977//! to choose the right variant:
978//!
979//! ```
980//! # use pretty_assertions::assert_eq;
981//! # use serde::Deserialize;
982//! # type One = ();
983//! # type Two = ();
984//! # /*
985//! type One = ...;
986//! type Two = ...;
987//! # */
988//! # #[derive(Debug, PartialEq)]
989//! #[derive(Deserialize)]
990//! struct AnyName(One, String, Two, One);
991//! # assert_eq!(
992//! #   AnyName((), "text cdata".into(), (), ()),
993//! #   quick_xml::de::from_str(r#"<one>...</one>text <![CDATA[cdata]]><two>...</two><one>...</one>"#).unwrap(),
994//! # );
995//! ```
996//! ```
997//! # use pretty_assertions::assert_eq;
998//! # use serde::Deserialize;
999//! # #[derive(Debug, PartialEq)]
1000//! #[derive(Deserialize)]
1001//! #[serde(rename_all = "snake_case")]
1002//! enum Choice {
1003//!   One,
1004//! }
1005//! # type Two = ();
1006//! # /*
1007//! type Two = ...;
1008//! # */
1009//! type AnyName = (Choice, String, Two, Choice);
1010//! # assert_eq!(
1011//! #   (Choice::One, "text cdata".to_string(), (), Choice::One),
1012//! #   quick_xml::de::from_str(r#"<one>...</one>text <![CDATA[cdata]]><two>...</two><one>...</one>"#).unwrap(),
1013//! # );
1014//! ```
1015//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
1016//!
1017//! NOTE: consequent text and CDATA nodes are merged into the one text node,
1018//! so you cannot have two adjacent string types in your sequence.
1019//!
1020//! NOTE: In the case that the list might contain tags that are overlapped with
1021//! tags that do not correspond to the list you should add the feature [`overlapped-lists`].
1022//! </div>
1023//! </td>
1024//! </tr>
1025//! <!-- 15 ==================================================================================== -->
1026//! <tr>
1027//! <td>
1028//! A sequence with a non-strict order, probably with a mixed content
1029//! (text / CDATA and tags).
1030//!
1031//! ```xml
1032//! <one>...</one>
1033//! text
1034//! <![CDATA[cdata]]>
1035//! <two>...</two>
1036//! <one>...</one>
1037//! ```
1038//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
1039//!
1040//! NOTE: this is just an example for showing mapping. XML does not allow
1041//! multiple root tags -- you should wrap the sequence into a tag.
1042//! </div>
1043//! </td>
1044//! <td>
1045//! A homogeneous sequence of elements with a fixed or dynamic size:
1046//!
1047//! ```
1048//! # use pretty_assertions::assert_eq;
1049//! # use serde::Deserialize;
1050//! # #[derive(Debug, PartialEq)]
1051//! #[derive(Deserialize)]
1052//! #[serde(rename_all = "snake_case")]
1053//! enum Choice {
1054//!   One,
1055//!   Two,
1056//!   #[serde(other)]
1057//!   Other,
1058//! }
1059//! type AnyName = [Choice; 4];
1060//! # assert_eq!(
1061//! #   [Choice::One, Choice::Other, Choice::Two, Choice::One],
1062//! #   quick_xml::de::from_str::<AnyName>(r#"<one>...</one>text <![CDATA[cdata]]><two>...</two><one>...</one>"#).unwrap(),
1063//! # );
1064//! ```
1065//! ```
1066//! # use pretty_assertions::assert_eq;
1067//! # use serde::Deserialize;
1068//! # #[derive(Debug, PartialEq)]
1069//! #[derive(Deserialize)]
1070//! #[serde(rename_all = "snake_case")]
1071//! enum Choice {
1072//!   One,
1073//!   Two,
1074//!   #[serde(rename = "$text")]
1075//!   Other(String),
1076//! }
1077//! type AnyName = Vec<Choice>;
1078//! # assert_eq!(
1079//! #   vec![
1080//! #     Choice::One,
1081//! #     Choice::Other("text cdata".into()),
1082//! #     Choice::Two,
1083//! #     Choice::One,
1084//! #   ],
1085//! #   quick_xml::de::from_str::<AnyName>(r#"<one>...</one>text <![CDATA[cdata]]><two>...</two><one>...</one>"#).unwrap(),
1086//! # );
1087//! ```
1088//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
1089//!
1090//! NOTE: consequent text and CDATA nodes are merged into the one text node,
1091//! so you cannot have two adjacent string types in your sequence.
1092//! </div>
1093//! </td>
1094//! </tr>
1095//! <!-- 16 ==================================================================================== -->
1096//! <tr>
1097//! <td>
1098//! A sequence with a strict order, probably with a mixed content,
1099//! (text and tags) inside of the other element:
1100//!
1101//! ```xml
1102//! <any-tag attribute="...">
1103//!   <one>...</one>
1104//!   text
1105//!   <![CDATA[cdata]]>
1106//!   <two>...</two>
1107//!   <one>...</one>
1108//! </any-tag>
1109//! ```
1110//! </td>
1111//! <td>
1112//!
1113//! A structure where all child elements mapped to the one field which have
1114//! a heterogeneous sequential type: tuple or named tuple. Each element of the
1115//! tuple should be able to be deserialized from the full element (`<one>...</one>`).
1116//!
1117//! You MUST specify `#[serde(rename = "$value")]` on that field:
1118//!
1119//! ```
1120//! # use pretty_assertions::assert_eq;
1121//! # use serde::Deserialize;
1122//! # type One = ();
1123//! # type Two = ();
1124//! # /*
1125//! type One = ...;
1126//! type Two = ...;
1127//! # */
1128//!
1129//! # #[derive(Debug, PartialEq)]
1130//! #[derive(Deserialize)]
1131//! struct AnyName {
1132//!   #[serde(rename = "@attribute")]
1133//! # attribute: (),
1134//! # /*
1135//!   attribute: ...,
1136//! # */
1137//!   // Does not (yet?) supported by the serde
1138//!   // https://github.com/serde-rs/serde/issues/1905
1139//!   // #[serde(flatten)]
1140//!   #[serde(rename = "$value")]
1141//!   any_name: (One, String, Two, One),
1142//! }
1143//! # assert_eq!(
1144//! #   AnyName { attribute: (), any_name: ((), "text cdata".into(), (), ()) },
1145//! #   quick_xml::de::from_str("\
1146//! #     <any-tag attribute='...'>\
1147//! #       <one>...</one>\
1148//! #       text \
1149//! #       <![CDATA[cdata]]>\
1150//! #       <two>...</two>\
1151//! #       <one>...</one>\
1152//! #     </any-tag>"
1153//! #   ).unwrap(),
1154//! # );
1155//! ```
1156//! ```
1157//! # use pretty_assertions::assert_eq;
1158//! # use serde::Deserialize;
1159//! # type One = ();
1160//! # type Two = ();
1161//! # /*
1162//! type One = ...;
1163//! type Two = ...;
1164//! # */
1165//!
1166//! # #[derive(Debug, PartialEq)]
1167//! #[derive(Deserialize)]
1168//! struct NamedTuple(One, String, Two, One);
1169//!
1170//! # #[derive(Debug, PartialEq)]
1171//! #[derive(Deserialize)]
1172//! struct AnyName {
1173//!   #[serde(rename = "@attribute")]
1174//! # attribute: (),
1175//! # /*
1176//!   attribute: ...,
1177//! # */
1178//!   // Does not (yet?) supported by the serde
1179//!   // https://github.com/serde-rs/serde/issues/1905
1180//!   // #[serde(flatten)]
1181//!   #[serde(rename = "$value")]
1182//!   any_name: NamedTuple,
1183//! }
1184//! # assert_eq!(
1185//! #   AnyName { attribute: (), any_name: NamedTuple((), "text cdata".into(), (), ()) },
1186//! #   quick_xml::de::from_str("\
1187//! #     <any-tag attribute='...'>\
1188//! #       <one>...</one>\
1189//! #       text \
1190//! #       <![CDATA[cdata]]>\
1191//! #       <two>...</two>\
1192//! #       <one>...</one>\
1193//! #     </any-tag>"
1194//! #   ).unwrap(),
1195//! # );
1196//! ```
1197//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
1198//!
1199//! NOTE: consequent text and CDATA nodes are merged into the one text node,
1200//! so you cannot have two adjacent string types in your sequence.
1201//! </div>
1202//! </td>
1203//! </tr>
1204//! <!-- 17 ==================================================================================== -->
1205//! <tr>
1206//! <td>
1207//! A sequence with a non-strict order, probably with a mixed content
1208//! (text / CDATA and tags) inside of the other element:
1209//!
1210//! ```xml
1211//! <any-tag>
1212//!   <one>...</one>
1213//!   text
1214//!   <![CDATA[cdata]]>
1215//!   <two>...</two>
1216//!   <one>...</one>
1217//! </any-tag>
1218//! ```
1219//! </td>
1220//! <td>
1221//!
1222//! A structure where all child elements mapped to the one field which have
1223//! a homogeneous sequential type: array-like container. A container type `T`
1224//! should be able to be deserialized from the nested element content (`...`),
1225//! except if it is an enum type which would be deserialized from the full
1226//! element (`<one>...</one>`).
1227//!
1228//! You MUST specify `#[serde(rename = "$value")]` on that field:
1229//!
1230//! ```
1231//! # use pretty_assertions::assert_eq;
1232//! # use serde::Deserialize;
1233//! # #[derive(Debug, PartialEq)]
1234//! #[derive(Deserialize)]
1235//! #[serde(rename_all = "snake_case")]
1236//! enum Choice {
1237//!   One,
1238//!   Two,
1239//!   #[serde(rename = "$text")]
1240//!   Other(String),
1241//! }
1242//! # #[derive(Debug, PartialEq)]
1243//! #[derive(Deserialize)]
1244//! struct AnyName {
1245//!   #[serde(rename = "@attribute")]
1246//! # attribute: (),
1247//! # /*
1248//!   attribute: ...,
1249//! # */
1250//!   // Does not (yet?) supported by the serde
1251//!   // https://github.com/serde-rs/serde/issues/1905
1252//!   // #[serde(flatten)]
1253//!   #[serde(rename = "$value")]
1254//!   any_name: [Choice; 4],
1255//! }
1256//! # assert_eq!(
1257//! #   AnyName { attribute: (), any_name: [
1258//! #     Choice::One,
1259//! #     Choice::Other("text cdata".into()),
1260//! #     Choice::Two,
1261//! #     Choice::One,
1262//! #   ] },
1263//! #   quick_xml::de::from_str("\
1264//! #     <any-tag attribute='...'>\
1265//! #       <one>...</one>\
1266//! #       text \
1267//! #       <![CDATA[cdata]]>\
1268//! #       <two>...</two>\
1269//! #       <one>...</one>\
1270//! #     </any-tag>"
1271//! #   ).unwrap(),
1272//! # );
1273//! ```
1274//! ```
1275//! # use pretty_assertions::assert_eq;
1276//! # use serde::Deserialize;
1277//! # #[derive(Debug, PartialEq)]
1278//! #[derive(Deserialize)]
1279//! #[serde(rename_all = "snake_case")]
1280//! enum Choice {
1281//!   One,
1282//!   Two,
1283//!   #[serde(rename = "$text")]
1284//!   Other(String),
1285//! }
1286//! # #[derive(Debug, PartialEq)]
1287//! #[derive(Deserialize)]
1288//! struct AnyName {
1289//!   #[serde(rename = "@attribute")]
1290//! # attribute: (),
1291//! # /*
1292//!   attribute: ...,
1293//! # */
1294//!   // Does not (yet?) supported by the serde
1295//!   // https://github.com/serde-rs/serde/issues/1905
1296//!   // #[serde(flatten)]
1297//!   #[serde(rename = "$value")]
1298//!   any_name: Vec<Choice>,
1299//! }
1300//! # assert_eq!(
1301//! #   AnyName { attribute: (), any_name: vec![
1302//! #     Choice::One,
1303//! #     Choice::Other("text cdata".into()),
1304//! #     Choice::Two,
1305//! #     Choice::One,
1306//! #   ] },
1307//! #   quick_xml::de::from_str("\
1308//! #     <any-tag attribute='...'>\
1309//! #       <one>...</one>\
1310//! #       text \
1311//! #       <![CDATA[cdata]]>\
1312//! #       <two>...</two>\
1313//! #       <one>...</one>\
1314//! #     </any-tag>"
1315//! #   ).unwrap(),
1316//! # );
1317//! ```
1318//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
1319//!
1320//! NOTE: consequent text and CDATA nodes are merged into the one text node,
1321//! so you cannot have two adjacent string types in your sequence.
1322//! </div>
1323//! </td>
1324//! </tr>
1325//! </tbody>
1326//! </table>
1327//!
1328//!
1329//! Mapping of `xsi:nil`
1330//! ====================
1331//!
1332//! quick-xml supports handling of [`xsi:nil`] special attribute. When field of optional
1333//! type is mapped to the XML element which have `xsi:nil="true"` set, or if that attribute
1334//! is placed on parent XML element, the deserializer will call [`Visitor::visit_none`]
1335//! and skip XML element corresponding to a field.
1336//!
1337//! Examples:
1338//!
1339//! ```
1340//! # use pretty_assertions::assert_eq;
1341//! # use serde::Deserialize;
1342//! #[derive(Deserialize, Debug, PartialEq)]
1343//! struct TypeWithOptionalField {
1344//!   element: Option<String>,
1345//! }
1346//!
1347//! assert_eq!(
1348//!   TypeWithOptionalField {
1349//!     element: None,
1350//!   },
1351//!   quick_xml::de::from_str("
1352//!     <any-tag xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance'>
1353//!       <element xsi:nil='true'>Content is skiped because of xsi:nil='true'</element>
1354//!     </any-tag>
1355//!   ").unwrap(),
1356//! );
1357//! ```
1358//!
1359//! You can capture attributes from the optional type, because ` xsi:nil="true"` elements can have
1360//! attributes:
1361//! ```
1362//! # use pretty_assertions::assert_eq;
1363//! # use serde::Deserialize;
1364//! #[derive(Deserialize, Debug, PartialEq)]
1365//! struct TypeWithOptionalField {
1366//!   #[serde(rename = "@attribute")]
1367//!   attribute: usize,
1368//!
1369//!   element: Option<String>,
1370//!   non_optional: String,
1371//! }
1372//!
1373//! assert_eq!(
1374//!   TypeWithOptionalField {
1375//!     attribute: 42,
1376//!     element: None,
1377//!     non_optional: "Note, that non-optional fields will be deserialized as usual".to_string(),
1378//!   },
1379//!   quick_xml::de::from_str("
1380//!     <any-tag attribute='42' xsi:nil='true' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance'>
1381//!       <element>Content is skiped because of xsi:nil='true'</element>
1382//!       <non_optional>Note, that non-optional fields will be deserialized as usual</non_optional>
1383//!     </any-tag>
1384//!   ").unwrap(),
1385//! );
1386//! ```
1387//!
1388//! Generate Rust types from XML
1389//! ============================
1390//!
1391//! To speed up the creation of Rust types that represent a given XML file you can
1392//! use the [xml_schema_generator](https://github.com/Thomblin/xml_schema_generator).
1393//! It provides a standalone binary and a Rust library that parses one or more XML files
1394//! and generates a collection of structs that are compatible with quick_xml::de.
1395//!
1396//!
1397//!
1398//! Composition Rules
1399//! =================
1400//!
1401//! The XML format is very different from other formats supported by `serde`.
1402//! One such difference it is how data in the serialized form is related to
1403//! the Rust type. Usually each byte in the data can be associated only with
1404//! one field in the data structure. However, XML is an exception.
1405//!
1406//! For example, took this XML:
1407//!
1408//! ```xml
1409//! <any>
1410//!   <key attr="value"/>
1411//! </any>
1412//! ```
1413//!
1414//! and try to deserialize it to the struct `AnyName`:
1415//!
1416//! ```no_run
1417//! # use serde::Deserialize;
1418//! #[derive(Deserialize)]
1419//! struct AnyName { // AnyName calls `deserialize_struct` on `<any><key attr="value"/></any>`
1420//!                  //                         Used data:          ^^^^^^^^^^^^^^^^^^^
1421//!   key: Inner,    // Inner   calls `deserialize_struct` on `<key attr="value"/>`
1422//!                  //                         Used data:          ^^^^^^^^^^^^
1423//! }
1424//! #[derive(Deserialize)]
1425//! struct Inner {
1426//!   #[serde(rename = "@attr")]
1427//!   attr: String,  // String  calls `deserialize_string` on `value`
1428//!                  //                         Used data:     ^^^^^
1429//! }
1430//! ```
1431//!
1432//! Comments shows what methods of a [`Deserializer`] called by each struct
1433//! `deserialize` method and which input their seen. **Used data** shows, what
1434//! content is actually used for deserializing. As you see, name of the inner
1435//! `<key>` tag used both as a map key / outer struct field name and as part
1436//! of the inner struct (although _value_ of the tag, i.e. `key` is not used
1437//! by it).
1438//!
1439//!
1440//!
1441//! Enum Representations
1442//! ====================
1443//!
1444//! `quick-xml` represents enums differently in normal fields, `$text` fields and
1445//! `$value` fields. A normal representation is compatible with serde's adjacent
1446//! and internal tags feature -- tag for adjacently and internally tagged enums
1447//! are serialized using [`Serializer::serialize_unit_variant`] and deserialized
1448//! using [`Deserializer::deserialize_enum`].
1449//!
1450//! Use those simple rules to remember, how enum would be represented in XML:
1451//! - In `$value` field the representation is always the same as top-level representation;
1452//! - In `$text` field the representation is always the same as in normal field,
1453//!   but surrounding tags with field name are removed;
1454//! - In normal field the representation is always contains a tag with a field name.
1455//!
1456//! Normal enum variant
1457//! -------------------
1458//!
1459//! To model an `xs:choice` XML construct use `$value` field.
1460//! To model a top-level `xs:choice` just use the enum type.
1461//!
1462//! |Kind   |Top-level and in `$value` field          |In normal field      |In `$text` field     |
1463//! |-------|-----------------------------------------|---------------------|---------------------|
1464//! |Unit   |`<Unit/>`                                |`<field>Unit</field>`|`Unit`               |
1465//! |Newtype|`<Newtype>42</Newtype>`                  |Err(Custom) [^0]     |Err(Custom) [^0]     |
1466//! |Tuple  |`<Tuple>42</Tuple><Tuple>answer</Tuple>` |Err(Custom) [^0]     |Err(Custom) [^0]     |
1467//! |Struct |`<Struct><q>42</q><a>answer</a></Struct>`|Err(Custom) [^0]     |Err(Custom) [^0]     |
1468//!
1469//! `$text` enum variant
1470//! --------------------
1471//!
1472//! |Kind   |Top-level and in `$value` field          |In normal field      |In `$text` field     |
1473//! |-------|-----------------------------------------|---------------------|---------------------|
1474//! |Unit   |_(empty)_                                |`<field/>`           |_(empty)_            |
1475//! |Newtype|`42`                                     |Err(Custom) [^0] [^1]|Err(Custom) [^0] [^2]|
1476//! |Tuple  |`42 answer`                              |Err(Custom) [^0] [^3]|Err(Custom) [^0] [^4]|
1477//! |Struct |Err(Custom) [^0]                         |Err(Custom) [^0]     |Err(Custom) [^0]     |
1478//!
1479//! [^0]: Error is returned by the deserialized type. In case of derived implementation a `Custom`
1480//!       error will be returned, but custom deserialize implementation can successfully deserialize
1481//!       value from a string which will be passed to it.
1482//!
1483//! [^1]: If this serialize as `<field>42</field>` then it will be ambiguity during deserialization,
1484//!       because it clash with `Unit` representation in normal field.
1485//!
1486//! [^2]: If this serialize as `42` then it will be ambiguity during deserialization,
1487//!       because it clash with `Unit` representation in `$text` field.
1488//!
1489//! [^3]: If this serialize as `<field>42 answer</field>` then it will be ambiguity during deserialization,
1490//!       because it clash with `Unit` representation in normal field.
1491//!
1492//! [^4]: If this serialize as `42 answer` then it will be ambiguity during deserialization,
1493//!       because it clash with `Unit` representation in `$text` field.
1494//!
1495//!
1496//!
1497//! `$text` and `$value` special names
1498//! ==================================
1499//!
1500//! quick-xml supports two special names for fields -- `$text` and `$value`.
1501//! Although they may seem the same, there is a distinction. Two different
1502//! names is required mostly for serialization, because quick-xml should know
1503//! how you want to serialize certain constructs, which could be represented
1504//! through XML in multiple different ways.
1505//!
1506//! The only difference is in how complex types and sequences are serialized.
1507//! If you doubt which one you should select, begin with [`$value`](#value).
1508//!
1509//! ## `$text`
1510//! `$text` is used when you want to write your XML as a text or a CDATA content.
1511//! More formally, field with that name represents simple type definition with
1512//! `{variety} = atomic` or `{variety} = union` whose basic members are all atomic,
1513//! as described in the [specification].
1514//!
1515//! As a result, not all types of such fields can be serialized. Only serialization
1516//! of following types are supported:
1517//! - all primitive types (strings, numbers, booleans)
1518//! - unit variants of enumerations (serializes to a name of a variant)
1519//! - newtypes (delegates serialization to inner type)
1520//! - [`Option`] of above (`None` serializes to nothing)
1521//! - sequences (including tuples and tuple variants of enumerations) of above,
1522//!   excluding `None` and empty string elements (because it will not be possible
1523//!   to deserialize them back). The elements are separated by space(s)
1524//! - unit type `()` and unit structs (serializes to nothing)
1525//!
1526//! Complex types, such as structs and maps, are not supported in this field.
1527//! If you want them, you should use `$value`.
1528//!
1529//! Sequences serialized to a space-delimited string, that is why only certain
1530//! types are allowed in this mode:
1531//!
1532//! ```
1533//! # use serde::{Deserialize, Serialize};
1534//! # use quick_xml::de::from_str;
1535//! # use quick_xml::se::to_string;
1536//! #[derive(Deserialize, Serialize, PartialEq, Debug)]
1537//! struct AnyName {
1538//!     #[serde(rename = "$text")]
1539//!     field: Vec<usize>,
1540//! }
1541//!
1542//! let obj = AnyName { field: vec![1, 2, 3] };
1543//! let xml = to_string(&obj).unwrap();
1544//! assert_eq!(xml, "<AnyName>1 2 3</AnyName>");
1545//!
1546//! let object: AnyName = from_str(&xml).unwrap();
1547//! assert_eq!(object, obj);
1548//! ```
1549//!
1550//! ## `$value`
1551//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
1552//!
1553//! NOTE: a name `#content` would better explain the purpose of that field,
1554//! but `$value` is used for compatibility with other XML serde crates, which
1555//! uses that name. This will allow you to switch XML crates more smoothly if required.
1556//! </div>
1557//!
1558//! Representation of primitive types in `$value` does not differ from their
1559//! representation in `$text` field. The difference is how sequences are serialized.
1560//! `$value` serializes each sequence item as a separate XML element. The name
1561//! of that element is taken from serialized type, and because only `enum`s provide
1562//! such name (their variant name), only they should be used for such fields.
1563//!
1564//! `$value` fields does not support `struct` types with fields, the serialization
1565//! of such types would end with an `Err(Unsupported)`. Unit structs and unit
1566//! type `()` serializing to nothing and can be deserialized from any content.
1567//!
1568//! Serialization and deserialization of `$value` field performed as usual, except
1569//! that name for an XML element will be given by the serialized type, instead of
1570//! field. The latter allow to serialize enumerated types, where variant is encoded
1571//! as a tag name, and, so, represent an XSD `xs:choice` schema by the Rust `enum`.
1572//!
1573//! In the example below, field will be serialized as `<field/>`, because elements
1574//! get their names from the field name. It cannot be deserialized, because `Enum`
1575//! expects elements `<A/>`, `<B/>` or `<C/>`, but `AnyName` looked only for `<field/>`:
1576//!
1577//! ```
1578//! # use serde::{Deserialize, Serialize};
1579//! # use pretty_assertions::assert_eq;
1580//! # #[derive(PartialEq, Debug)]
1581//! #[derive(Deserialize, Serialize)]
1582//! enum Enum { A, B, C }
1583//!
1584//! # #[derive(PartialEq, Debug)]
1585//! #[derive(Deserialize, Serialize)]
1586//! struct AnyName {
1587//!     // <field>A</field>, <field>B</field>, or <field>C</field>
1588//!     field: Enum,
1589//! }
1590//! # assert_eq!(
1591//! #     quick_xml::se::to_string(&AnyName { field: Enum::A }).unwrap(),
1592//! #     "<AnyName><field>A</field></AnyName>",
1593//! # );
1594//! # assert_eq!(
1595//! #     AnyName { field: Enum::B },
1596//! #     quick_xml::de::from_str("<root><field>B</field></root>").unwrap(),
1597//! # );
1598//! ```
1599//!
1600//! If you rename field to `$value`, then `field` would be serialized as `<A/>`,
1601//! `<B/>` or `<C/>`, depending on the its content. It is also possible to
1602//! deserialize it from the same elements:
1603//!
1604//! ```
1605//! # use serde::{Deserialize, Serialize};
1606//! # use pretty_assertions::assert_eq;
1607//! # #[derive(Deserialize, Serialize, PartialEq, Debug)]
1608//! # enum Enum { A, B, C }
1609//! #
1610//! # #[derive(PartialEq, Debug)]
1611//! #[derive(Deserialize, Serialize)]
1612//! struct AnyName {
1613//!     // <A/>, <B/> or <C/>
1614//!     #[serde(rename = "$value")]
1615//!     field: Enum,
1616//! }
1617//! # assert_eq!(
1618//! #     quick_xml::se::to_string(&AnyName { field: Enum::A }).unwrap(),
1619//! #     "<AnyName><A/></AnyName>",
1620//! # );
1621//! # assert_eq!(
1622//! #     AnyName { field: Enum::B },
1623//! #     quick_xml::de::from_str("<root><B/></root>").unwrap(),
1624//! # );
1625//! ```
1626//!
1627//! ### Primitives and sequences of primitives
1628//!
1629//! Sequences serialized to a list of elements. Note, that types that does not
1630//! produce their own tag (i. e. primitives) will produce [`SeError::Unsupported`]
1631//! if they contains more that one element, because such sequence cannot be
1632//! deserialized to the same value:
1633//!
1634//! ```
1635//! # use serde::{Deserialize, Serialize};
1636//! # use pretty_assertions::assert_eq;
1637//! # use quick_xml::de::from_str;
1638//! # use quick_xml::se::to_string;
1639//! #[derive(Deserialize, Serialize, PartialEq, Debug)]
1640//! struct AnyName {
1641//!     #[serde(rename = "$value")]
1642//!     field: Vec<usize>,
1643//! }
1644//!
1645//! let obj = AnyName { field: vec![1, 2, 3] };
1646//! // If this object were serialized, it would be represented as "<AnyName>123</AnyName>"
1647//! to_string(&obj).unwrap_err();
1648//!
1649//! let object: AnyName = from_str("<AnyName>123</AnyName>").unwrap();
1650//! assert_eq!(object, AnyName { field: vec![123] });
1651//!
1652//! // `1 2 3` is mapped to a single `usize` element
1653//! // It is impossible to deserialize list of primitives to such field
1654//! from_str::<AnyName>("<AnyName>1 2 3</AnyName>").unwrap_err();
1655//! ```
1656//!
1657//! A particular case of that example is a string `$value` field, which probably
1658//! would be a most used example of that attribute:
1659//!
1660//! ```
1661//! # use serde::{Deserialize, Serialize};
1662//! # use pretty_assertions::assert_eq;
1663//! # use quick_xml::de::from_str;
1664//! # use quick_xml::se::to_string;
1665//! #[derive(Deserialize, Serialize, PartialEq, Debug)]
1666//! struct AnyName {
1667//!     #[serde(rename = "$value")]
1668//!     field: String,
1669//! }
1670//!
1671//! let obj = AnyName { field: "content".to_string() };
1672//! let xml = to_string(&obj).unwrap();
1673//! assert_eq!(xml, "<AnyName>content</AnyName>");
1674//! ```
1675//!
1676//! ### Structs and sequences of structs
1677//!
1678//! Note, that structures do not have a serializable name as well (name of the
1679//! type is never used), so it is impossible to serialize non-unit struct or
1680//! sequence of non-unit structs in `$value` field. (sequences of) unit structs
1681//! are serialized as empty string, because units itself serializing
1682//! to nothing:
1683//!
1684//! ```
1685//! # use serde::{Deserialize, Serialize};
1686//! # use pretty_assertions::assert_eq;
1687//! # use quick_xml::de::from_str;
1688//! # use quick_xml::se::to_string;
1689//! #[derive(Deserialize, Serialize, PartialEq, Debug)]
1690//! struct Unit;
1691//!
1692//! #[derive(Deserialize, Serialize, PartialEq, Debug)]
1693//! struct AnyName {
1694//!     // #[serde(default)] is required to deserialization of empty lists
1695//!     // This is a general note, not related to $value
1696//!     #[serde(rename = "$value", default)]
1697//!     field: Vec<Unit>,
1698//! }
1699//!
1700//! let obj = AnyName { field: vec![Unit, Unit, Unit] };
1701//! let xml = to_string(&obj).unwrap();
1702//! assert_eq!(xml, "<AnyName/>");
1703//!
1704//! let object: AnyName = from_str("<AnyName/>").unwrap();
1705//! assert_eq!(object, AnyName { field: vec![] });
1706//!
1707//! let object: AnyName = from_str("<AnyName></AnyName>").unwrap();
1708//! assert_eq!(object, AnyName { field: vec![] });
1709//!
1710//! let object: AnyName = from_str("<AnyName><A/><B/><C/></AnyName>").unwrap();
1711//! assert_eq!(object, AnyName { field: vec![Unit, Unit, Unit] });
1712//! ```
1713//!
1714//! ### Enums and sequences of enums
1715//!
1716//! Enumerations uses the variant name as an element name:
1717//!
1718//! ```
1719//! # use serde::{Deserialize, Serialize};
1720//! # use pretty_assertions::assert_eq;
1721//! # use quick_xml::de::from_str;
1722//! # use quick_xml::se::to_string;
1723//! #[derive(Deserialize, Serialize, PartialEq, Debug)]
1724//! struct AnyName {
1725//!     #[serde(rename = "$value")]
1726//!     field: Vec<Enum>,
1727//! }
1728//!
1729//! #[derive(Deserialize, Serialize, PartialEq, Debug)]
1730//! enum Enum { A, B, C }
1731//!
1732//! let obj = AnyName { field: vec![Enum::A, Enum::B, Enum::C] };
1733//! let xml = to_string(&obj).unwrap();
1734//! assert_eq!(
1735//!     xml,
1736//!     "<AnyName>\
1737//!         <A/>\
1738//!         <B/>\
1739//!         <C/>\
1740//!      </AnyName>"
1741//! );
1742//!
1743//! let object: AnyName = from_str(&xml).unwrap();
1744//! assert_eq!(object, obj);
1745//! ```
1746//!
1747//! ----------------------------------------------------------------------------
1748//!
1749//! You can have either `$text` or `$value` field in your structs. Unfortunately,
1750//! that is not enforced, so you can theoretically have both, but you should
1751//! avoid that.
1752//!
1753//!
1754//!
1755//! Frequently Used Patterns
1756//! ========================
1757//!
1758//! Some XML constructs used so frequent, that it is worth to document the recommended
1759//! way to represent them in the Rust. The sections below describes them.
1760//!
1761//! `<element>` lists
1762//! -----------------
1763//! Many XML formats wrap lists of elements in the additional container,
1764//! although this is not required by the XML rules:
1765//!
1766//! ```xml
1767//! <root>
1768//!   <field1/>
1769//!   <field2/>
1770//!   <list><!-- Container -->
1771//!     <element/>
1772//!     <element/>
1773//!     <element/>
1774//!   </list>
1775//!   <field3/>
1776//! </root>
1777//! ```
1778//! In this case, there is a great desire to describe this XML in this way:
1779//! ```
1780//! /// Represents <element/>
1781//! type Element = ();
1782//!
1783//! /// Represents <root>...</root>
1784//! struct AnyName {
1785//!     // Incorrect
1786//!     list: Vec<Element>,
1787//! }
1788//! ```
1789//! This will not work, because potentially `<list>` element can have attributes
1790//! and other elements inside. You should define the struct for the `<list>`
1791//! explicitly, as you do that in the XSD for that XML:
1792//! ```
1793//! /// Represents <element/>
1794//! type Element = ();
1795//!
1796//! /// Represents <root>...</root>
1797//! struct AnyName {
1798//!     // Correct
1799//!     list: List,
1800//! }
1801//! /// Represents <list>...</list>
1802//! struct List {
1803//!     element: Vec<Element>,
1804//! }
1805//! ```
1806//!
1807//! If you want to simplify your API, you could write a simple function for unwrapping
1808//! inner list and apply it via [`deserialize_with`]:
1809//!
1810//! ```
1811//! # use pretty_assertions::assert_eq;
1812//! use quick_xml::de::from_str;
1813//! use serde::{Deserialize, Deserializer};
1814//!
1815//! /// Represents <element/>
1816//! type Element = ();
1817//!
1818//! /// Represents <root>...</root>
1819//! #[derive(Deserialize, Debug, PartialEq)]
1820//! struct AnyName {
1821//!     #[serde(deserialize_with = "unwrap_list")]
1822//!     list: Vec<Element>,
1823//! }
1824//!
1825//! fn unwrap_list<'de, D>(deserializer: D) -> Result<Vec<Element>, D::Error>
1826//! where
1827//!     D: Deserializer<'de>,
1828//! {
1829//!     /// Represents <list>...</list>
1830//!     #[derive(Deserialize)]
1831//!     struct List {
1832//!         // default allows empty list
1833//!         #[serde(default)]
1834//!         element: Vec<Element>,
1835//!     }
1836//!     Ok(List::deserialize(deserializer)?.element)
1837//! }
1838//!
1839//! assert_eq!(
1840//!     AnyName { list: vec![(), (), ()] },
1841//!     from_str("
1842//!         <root>
1843//!           <list>
1844//!             <element/>
1845//!             <element/>
1846//!             <element/>
1847//!           </list>
1848//!         </root>
1849//!     ").unwrap(),
1850//! );
1851//! ```
1852//!
1853//! Instead of writing such functions manually, you also could try <https://lib.rs/crates/serde-query>.
1854//!
1855//! Overlapped (Out-of-Order) Elements
1856//! ----------------------------------
1857//! In the case that the list might contain tags that are overlapped with
1858//! tags that do not correspond to the list (this is a usual case in XML
1859//! documents) like this:
1860//! ```xml
1861//! <any-name>
1862//!   <item/>
1863//!   <another-item/>
1864//!   <item/>
1865//!   <item/>
1866//! </any-name>
1867//! ```
1868//! you should enable the [`overlapped-lists`] feature to make it possible
1869//! to deserialize this to:
1870//! ```no_run
1871//! # use serde::Deserialize;
1872//! #[derive(Deserialize)]
1873//! #[serde(rename_all = "kebab-case")]
1874//! struct AnyName {
1875//!     item: Vec<()>,
1876//!     another_item: (),
1877//! }
1878//! ```
1879//!
1880//!
1881//! Internally Tagged Enums
1882//! -----------------------
1883//! [Tagged enums] are currently not supported because of an issue in the Serde
1884//! design (see [serde#1183] and [quick-xml#586]) and missing optimizations in
1885//! Serde which could be useful for XML parsing ([serde#1495]). This can be worked
1886//! around by manually implementing deserialize with `#[serde(deserialize_with = "func")]`
1887//! or implementing [`Deserialize`], but this can get very tedious very fast for
1888//! files with large amounts of tagged enums. To help with this issue quick-xml
1889//! provides a macro [`impl_deserialize_for_internally_tagged_enum!`]. See the
1890//! macro documentation for details.
1891//!
1892//!
1893//! [`overlapped-lists`]: ../index.html#overlapped-lists
1894//! [specification]: https://www.w3.org/TR/xmlschema11-1/#Simple_Type_Definition
1895//! [`deserialize_with`]: https://serde.rs/field-attrs.html#deserialize_with
1896//! [`xsi:nil`]: https://www.w3.org/TR/xmlschema-1/#xsi_nil
1897//! [`Serializer::serialize_unit_variant`]: serde::Serializer::serialize_unit_variant
1898//! [`Deserializer::deserialize_enum`]: serde::Deserializer::deserialize_enum
1899//! [`SeError::Unsupported`]: crate::errors::serialize::SeError::Unsupported
1900//! [Tagged enums]: https://serde.rs/enum-representations.html#internally-tagged
1901//! [serde#1183]: https://github.com/serde-rs/serde/issues/1183
1902//! [serde#1495]: https://github.com/serde-rs/serde/issues/1495
1903//! [quick-xml#586]: https://github.com/tafia/quick-xml/issues/586
1904//! [`impl_deserialize_for_internally_tagged_enum!`]: crate::impl_deserialize_for_internally_tagged_enum
1905
1906// Macros should be defined before the modules that using them
1907// Also, macros should be imported before using them
1908use serde::serde_if_integer128;
1909
1910macro_rules! deserialize_num {
1911    ($deserialize:ident => $visit:ident, $($mut:tt)?) => {
1912        fn $deserialize<V>($($mut)? self, visitor: V) -> Result<V::Value, DeError>
1913        where
1914            V: Visitor<'de>,
1915        {
1916            // No need to unescape because valid integer representations cannot be escaped
1917            let text = self.read_string()?;
1918            match text.parse() {
1919                Ok(number) => visitor.$visit(number),
1920                Err(_) => match text {
1921                    Cow::Borrowed(t) => visitor.visit_str(t),
1922                    Cow::Owned(t) => visitor.visit_string(t),
1923                }
1924            }
1925        }
1926    };
1927}
1928
1929/// Implement deserialization methods for scalar types, such as numbers, strings,
1930/// byte arrays, booleans and identifiers.
1931macro_rules! deserialize_primitives {
1932    ($($mut:tt)?) => {
1933        deserialize_num!(deserialize_i8 => visit_i8, $($mut)?);
1934        deserialize_num!(deserialize_i16 => visit_i16, $($mut)?);
1935        deserialize_num!(deserialize_i32 => visit_i32, $($mut)?);
1936        deserialize_num!(deserialize_i64 => visit_i64, $($mut)?);
1937
1938        deserialize_num!(deserialize_u8 => visit_u8, $($mut)?);
1939        deserialize_num!(deserialize_u16 => visit_u16, $($mut)?);
1940        deserialize_num!(deserialize_u32 => visit_u32, $($mut)?);
1941        deserialize_num!(deserialize_u64 => visit_u64, $($mut)?);
1942
1943        serde_if_integer128! {
1944            deserialize_num!(deserialize_i128 => visit_i128, $($mut)?);
1945            deserialize_num!(deserialize_u128 => visit_u128, $($mut)?);
1946        }
1947
1948        deserialize_num!(deserialize_f32 => visit_f32, $($mut)?);
1949        deserialize_num!(deserialize_f64 => visit_f64, $($mut)?);
1950
1951        fn deserialize_bool<V>($($mut)? self, visitor: V) -> Result<V::Value, DeError>
1952        where
1953            V: Visitor<'de>,
1954        {
1955            let text = match self.read_string()? {
1956                Cow::Borrowed(s) => CowRef::Input(s),
1957                Cow::Owned(s) => CowRef::Owned(s),
1958            };
1959            text.deserialize_bool(visitor)
1960        }
1961
1962        /// Character represented as [strings](#method.deserialize_str).
1963        #[inline]
1964        fn deserialize_char<V>(self, visitor: V) -> Result<V::Value, DeError>
1965        where
1966            V: Visitor<'de>,
1967        {
1968            self.deserialize_str(visitor)
1969        }
1970
1971        fn deserialize_str<V>($($mut)? self, visitor: V) -> Result<V::Value, DeError>
1972        where
1973            V: Visitor<'de>,
1974        {
1975            let text = self.read_string()?;
1976            match text {
1977                Cow::Borrowed(string) => visitor.visit_borrowed_str(string),
1978                Cow::Owned(string) => visitor.visit_string(string),
1979            }
1980        }
1981
1982        /// Representation of owned strings the same as [non-owned](#method.deserialize_str).
1983        #[inline]
1984        fn deserialize_string<V>(self, visitor: V) -> Result<V::Value, DeError>
1985        where
1986            V: Visitor<'de>,
1987        {
1988            self.deserialize_str(visitor)
1989        }
1990
1991        /// Forwards deserialization to the [`deserialize_any`](#method.deserialize_any).
1992        #[inline]
1993        fn deserialize_bytes<V>(self, visitor: V) -> Result<V::Value, DeError>
1994        where
1995            V: Visitor<'de>,
1996        {
1997            self.deserialize_any(visitor)
1998        }
1999
2000        /// Forwards deserialization to the [`deserialize_bytes`](#method.deserialize_bytes).
2001        #[inline]
2002        fn deserialize_byte_buf<V>(self, visitor: V) -> Result<V::Value, DeError>
2003        where
2004            V: Visitor<'de>,
2005        {
2006            self.deserialize_bytes(visitor)
2007        }
2008
2009        /// Representation of the named units the same as [unnamed units](#method.deserialize_unit).
2010        #[inline]
2011        fn deserialize_unit_struct<V>(
2012            self,
2013            _name: &'static str,
2014            visitor: V,
2015        ) -> Result<V::Value, DeError>
2016        where
2017            V: Visitor<'de>,
2018        {
2019            self.deserialize_unit(visitor)
2020        }
2021
2022        /// Representation of tuples the same as [sequences](#method.deserialize_seq).
2023        #[inline]
2024        fn deserialize_tuple<V>(self, _len: usize, visitor: V) -> Result<V::Value, DeError>
2025        where
2026            V: Visitor<'de>,
2027        {
2028            self.deserialize_seq(visitor)
2029        }
2030
2031        /// Representation of named tuples the same as [unnamed tuples](#method.deserialize_tuple).
2032        #[inline]
2033        fn deserialize_tuple_struct<V>(
2034            self,
2035            _name: &'static str,
2036            len: usize,
2037            visitor: V,
2038        ) -> Result<V::Value, DeError>
2039        where
2040            V: Visitor<'de>,
2041        {
2042            self.deserialize_tuple(len, visitor)
2043        }
2044
2045        /// Forwards deserialization to the [`deserialize_struct`](#method.deserialize_struct)
2046        /// with empty name and fields.
2047        #[inline]
2048        fn deserialize_map<V>(self, visitor: V) -> Result<V::Value, DeError>
2049        where
2050            V: Visitor<'de>,
2051        {
2052            self.deserialize_struct("", &[], visitor)
2053        }
2054
2055        /// Identifiers represented as [strings](#method.deserialize_str).
2056        #[inline]
2057        fn deserialize_identifier<V>(self, visitor: V) -> Result<V::Value, DeError>
2058        where
2059            V: Visitor<'de>,
2060        {
2061            self.deserialize_str(visitor)
2062        }
2063
2064        /// Forwards deserialization to the [`deserialize_unit`](#method.deserialize_unit).
2065        #[inline]
2066        fn deserialize_ignored_any<V>(self, visitor: V) -> Result<V::Value, DeError>
2067        where
2068            V: Visitor<'de>,
2069        {
2070            self.deserialize_unit(visitor)
2071        }
2072    };
2073}
2074
2075mod key;
2076mod map;
2077mod resolver;
2078mod simple_type;
2079mod text;
2080mod var;
2081
2082pub use self::resolver::{EntityResolver, PredefinedEntityResolver};
2083pub use self::simple_type::SimpleTypeDeserializer;
2084pub use crate::errors::serialize::DeError;
2085
2086use crate::{
2087    de::map::ElementMapAccess,
2088    encoding::Decoder,
2089    errors::Error,
2090    events::{BytesCData, BytesEnd, BytesStart, BytesText, Event},
2091    name::QName,
2092    reader::NsReader,
2093    utils::CowRef,
2094};
2095use serde::de::{
2096    self, Deserialize, DeserializeOwned, DeserializeSeed, IntoDeserializer, SeqAccess, Visitor,
2097};
2098use std::borrow::Cow;
2099#[cfg(feature = "overlapped-lists")]
2100use std::collections::VecDeque;
2101use std::io::BufRead;
2102use std::mem::replace;
2103#[cfg(feature = "overlapped-lists")]
2104use std::num::NonZeroUsize;
2105use std::ops::Deref;
2106
2107/// Data represented by a text node or a CDATA node. XML markup is not expected
2108pub(crate) const TEXT_KEY: &str = "$text";
2109/// Data represented by any XML markup inside
2110pub(crate) const VALUE_KEY: &str = "$value";
2111
2112/// Decoded and concatenated content of consequent [`Text`] and [`CData`]
2113/// events. _Consequent_ means that events should follow each other or be
2114/// delimited only by (any count of) [`Comment`] or [`PI`] events.
2115///
2116/// Internally text is stored in `Cow<str>`. Cloning of text is cheap while it
2117/// is borrowed and makes copies of data when it is owned.
2118///
2119/// [`Text`]: Event::Text
2120/// [`CData`]: Event::CData
2121/// [`Comment`]: Event::Comment
2122/// [`PI`]: Event::PI
2123#[derive(Clone, Debug, PartialEq, Eq)]
2124pub struct Text<'a> {
2125    text: Cow<'a, str>,
2126}
2127
2128impl<'a> Deref for Text<'a> {
2129    type Target = str;
2130
2131    #[inline]
2132    fn deref(&self) -> &Self::Target {
2133        self.text.deref()
2134    }
2135}
2136
2137impl<'a> From<&'a str> for Text<'a> {
2138    #[inline]
2139    fn from(text: &'a str) -> Self {
2140        Self {
2141            text: Cow::Borrowed(text),
2142        }
2143    }
2144}
2145
2146impl<'a> From<String> for Text<'a> {
2147    #[inline]
2148    fn from(text: String) -> Self {
2149        Self {
2150            text: Cow::Owned(text),
2151        }
2152    }
2153}
2154
2155impl<'a> From<Cow<'a, str>> for Text<'a> {
2156    #[inline]
2157    fn from(text: Cow<'a, str>) -> Self {
2158        Self { text }
2159    }
2160}
2161
2162////////////////////////////////////////////////////////////////////////////////////////////////////
2163
2164/// Simplified event which contains only these variants that used by deserializer
2165#[derive(Clone, Debug, PartialEq, Eq)]
2166pub enum DeEvent<'a> {
2167    /// Start tag (with attributes) `<tag attr="value">`.
2168    Start(BytesStart<'a>),
2169    /// End tag `</tag>`.
2170    End(BytesEnd<'a>),
2171    /// Decoded and concatenated content of consequent [`Text`] and [`CData`]
2172    /// events. _Consequent_ means that events should follow each other or be
2173    /// delimited only by (any count of) [`Comment`] or [`PI`] events.
2174    ///
2175    /// [`Text`]: Event::Text
2176    /// [`CData`]: Event::CData
2177    /// [`Comment`]: Event::Comment
2178    /// [`PI`]: Event::PI
2179    Text(Text<'a>),
2180    /// End of XML document.
2181    Eof,
2182}
2183
2184////////////////////////////////////////////////////////////////////////////////////////////////////
2185
2186/// Simplified event which contains only these variants that used by deserializer,
2187/// but [`Text`] events not yet fully processed.
2188///
2189/// [`Text`] events should be trimmed if they does not surrounded by the other
2190/// [`Text`] or [`CData`] events. This event contains intermediate state of [`Text`]
2191/// event, where they are trimmed from the start, but not from the end. To trim
2192/// end spaces we should lookahead by one deserializer event (i. e. skip all
2193/// comments and processing instructions).
2194///
2195/// [`Text`]: Event::Text
2196/// [`CData`]: Event::CData
2197#[derive(Clone, Debug, PartialEq, Eq)]
2198pub enum PayloadEvent<'a> {
2199    /// Start tag (with attributes) `<tag attr="value">`.
2200    Start(BytesStart<'a>),
2201    /// End tag `</tag>`.
2202    End(BytesEnd<'a>),
2203    /// Escaped character data between tags.
2204    Text(BytesText<'a>),
2205    /// Unescaped character data stored in `<![CDATA[...]]>`.
2206    CData(BytesCData<'a>),
2207    /// Document type definition data (DTD) stored in `<!DOCTYPE ...>`.
2208    DocType(BytesText<'a>),
2209    /// End of XML document.
2210    Eof,
2211}
2212
2213impl<'a> PayloadEvent<'a> {
2214    /// Ensures that all data is owned to extend the object's lifetime if necessary.
2215    #[inline]
2216    fn into_owned(self) -> PayloadEvent<'static> {
2217        match self {
2218            PayloadEvent::Start(e) => PayloadEvent::Start(e.into_owned()),
2219            PayloadEvent::End(e) => PayloadEvent::End(e.into_owned()),
2220            PayloadEvent::Text(e) => PayloadEvent::Text(e.into_owned()),
2221            PayloadEvent::CData(e) => PayloadEvent::CData(e.into_owned()),
2222            PayloadEvent::DocType(e) => PayloadEvent::DocType(e.into_owned()),
2223            PayloadEvent::Eof => PayloadEvent::Eof,
2224        }
2225    }
2226}
2227
2228/// An intermediate reader that consumes [`PayloadEvent`]s and produces final [`DeEvent`]s.
2229/// [`PayloadEvent::Text`] events, that followed by any event except
2230/// [`PayloadEvent::Text`] or [`PayloadEvent::CData`], are trimmed from the end.
2231struct XmlReader<'i, R: XmlRead<'i>, E: EntityResolver = PredefinedEntityResolver> {
2232    /// A source of low-level XML events
2233    reader: R,
2234    /// Intermediate event, that could be returned by the next call to `next()`.
2235    /// If that is the `Text` event then leading spaces already trimmed, but
2236    /// trailing spaces is not. Before the event will be returned, trimming of
2237    /// the spaces could be necessary
2238    lookahead: Result<PayloadEvent<'i>, DeError>,
2239
2240    /// Used to resolve unknown entities that would otherwise cause the parser
2241    /// to return an [`EscapeError::UnrecognizedEntity`] error.
2242    ///
2243    /// [`EscapeError::UnrecognizedEntity`]: crate::escape::EscapeError::UnrecognizedEntity
2244    entity_resolver: E,
2245}
2246
2247impl<'i, R: XmlRead<'i>, E: EntityResolver> XmlReader<'i, R, E> {
2248    fn new(mut reader: R, entity_resolver: E) -> Self {
2249        // Lookahead by one event immediately, so we do not need to check in the
2250        // loop if we need lookahead or not
2251        let lookahead = reader.next();
2252
2253        Self {
2254            reader,
2255            lookahead,
2256            entity_resolver,
2257        }
2258    }
2259
2260    /// Returns `true` if all events was consumed
2261    const fn is_empty(&self) -> bool {
2262        matches!(self.lookahead, Ok(PayloadEvent::Eof))
2263    }
2264
2265    /// Read next event and put it in lookahead, return the current lookahead
2266    #[inline(always)]
2267    fn next_impl(&mut self) -> Result<PayloadEvent<'i>, DeError> {
2268        replace(&mut self.lookahead, self.reader.next())
2269    }
2270
2271    /// Returns `true` when next event is not a text event in any form.
2272    #[inline(always)]
2273    const fn current_event_is_last_text(&self) -> bool {
2274        // If next event is a text or CDATA, we should not trim trailing spaces
2275        !matches!(
2276            self.lookahead,
2277            Ok(PayloadEvent::Text(_)) | Ok(PayloadEvent::CData(_))
2278        )
2279    }
2280
2281    /// Read all consequent [`Text`] and [`CData`] events until non-text event
2282    /// occurs. Content of all events would be appended to `result` and returned
2283    /// as [`DeEvent::Text`].
2284    ///
2285    /// [`Text`]: PayloadEvent::Text
2286    /// [`CData`]: PayloadEvent::CData
2287    fn drain_text(&mut self, mut result: Cow<'i, str>) -> Result<DeEvent<'i>, DeError> {
2288        loop {
2289            if self.current_event_is_last_text() {
2290                break;
2291            }
2292
2293            match self.next_impl()? {
2294                PayloadEvent::Text(mut e) => {
2295                    if self.current_event_is_last_text() {
2296                        // FIXME: Actually, we should trim after decoding text, but now we trim before
2297                        e.inplace_trim_end();
2298                    }
2299                    result
2300                        .to_mut()
2301                        .push_str(&e.unescape_with(|entity| self.entity_resolver.resolve(entity))?);
2302                }
2303                PayloadEvent::CData(e) => result.to_mut().push_str(&e.decode()?),
2304
2305                // SAFETY: current_event_is_last_text checks that event is Text or CData
2306                _ => unreachable!("Only `Text` and `CData` events can come here"),
2307            }
2308        }
2309        Ok(DeEvent::Text(Text { text: result }))
2310    }
2311
2312    /// Return an input-borrowing event.
2313    fn next(&mut self) -> Result<DeEvent<'i>, DeError> {
2314        loop {
2315            return match self.next_impl()? {
2316                PayloadEvent::Start(e) => Ok(DeEvent::Start(e)),
2317                PayloadEvent::End(e) => Ok(DeEvent::End(e)),
2318                PayloadEvent::Text(mut e) => {
2319                    if self.current_event_is_last_text() && e.inplace_trim_end() {
2320                        // FIXME: Actually, we should trim after decoding text, but now we trim before
2321                        continue;
2322                    }
2323                    self.drain_text(e.unescape_with(|entity| self.entity_resolver.resolve(entity))?)
2324                }
2325                PayloadEvent::CData(e) => self.drain_text(e.decode()?),
2326                PayloadEvent::DocType(e) => {
2327                    self.entity_resolver
2328                        .capture(e)
2329                        .map_err(|err| DeError::Custom(format!("cannot parse DTD: {}", err)))?;
2330                    continue;
2331                }
2332                PayloadEvent::Eof => Ok(DeEvent::Eof),
2333            };
2334        }
2335    }
2336
2337    #[inline]
2338    fn read_to_end(&mut self, name: QName) -> Result<(), DeError> {
2339        match self.lookahead {
2340            // We pre-read event with the same name that is required to be skipped.
2341            // First call of `read_to_end` will end out pre-read event, the second
2342            // will consume other events
2343            Ok(PayloadEvent::Start(ref e)) if e.name() == name => {
2344                let result1 = self.reader.read_to_end(name);
2345                let result2 = self.reader.read_to_end(name);
2346
2347                // In case of error `next_impl` returns `Eof`
2348                let _ = self.next_impl();
2349                result1?;
2350                result2?;
2351            }
2352            // We pre-read event with the same name that is required to be skipped.
2353            // Because this is end event, we already consume the whole tree, so
2354            // nothing to do, just update lookahead
2355            Ok(PayloadEvent::End(ref e)) if e.name() == name => {
2356                let _ = self.next_impl();
2357            }
2358            Ok(_) => {
2359                let result = self.reader.read_to_end(name);
2360
2361                // In case of error `next_impl` returns `Eof`
2362                let _ = self.next_impl();
2363                result?;
2364            }
2365            // Read next lookahead event, unpack error from the current lookahead
2366            Err(_) => {
2367                self.next_impl()?;
2368            }
2369        }
2370        Ok(())
2371    }
2372
2373    #[inline]
2374    fn decoder(&self) -> Decoder {
2375        self.reader.decoder()
2376    }
2377}
2378
2379////////////////////////////////////////////////////////////////////////////////////////////////////
2380
2381/// Deserialize an instance of type `T` from a string of XML text.
2382pub fn from_str<'de, T>(s: &'de str) -> Result<T, DeError>
2383where
2384    T: Deserialize<'de>,
2385{
2386    let mut de = Deserializer::from_str(s);
2387    T::deserialize(&mut de)
2388}
2389
2390/// Deserialize from a reader. This method will do internal copies of data
2391/// read from `reader`. If you want have a `&str` input and want to borrow
2392/// as much as possible, use [`from_str`].
2393pub fn from_reader<R, T>(reader: R) -> Result<T, DeError>
2394where
2395    R: BufRead,
2396    T: DeserializeOwned,
2397{
2398    let mut de = Deserializer::from_reader(reader);
2399    T::deserialize(&mut de)
2400}
2401
2402////////////////////////////////////////////////////////////////////////////////////////////////////
2403
2404/// A structure that deserializes XML into Rust values.
2405pub struct Deserializer<'de, R, E: EntityResolver = PredefinedEntityResolver>
2406where
2407    R: XmlRead<'de>,
2408{
2409    /// An XML reader that streams events into this deserializer
2410    reader: XmlReader<'de, R, E>,
2411
2412    /// When deserializing sequences sometimes we have to skip unwanted events.
2413    /// That events should be stored and then replayed. This is a replay buffer,
2414    /// that streams events while not empty. When it exhausted, events will
2415    /// requested from [`Self::reader`].
2416    #[cfg(feature = "overlapped-lists")]
2417    read: VecDeque<DeEvent<'de>>,
2418    /// When deserializing sequences sometimes we have to skip events, because XML
2419    /// is tolerant to elements order and even if in the XSD order is strictly
2420    /// specified (using `xs:sequence`) most of XML parsers allows order violations.
2421    /// That means, that elements, forming a sequence, could be overlapped with
2422    /// other elements, do not related to that sequence.
2423    ///
2424    /// In order to support this, deserializer will scan events and skip unwanted
2425    /// events, store them here. After call [`Self::start_replay()`] all events
2426    /// moved from this to [`Self::read`].
2427    #[cfg(feature = "overlapped-lists")]
2428    write: VecDeque<DeEvent<'de>>,
2429    /// Maximum number of events that can be skipped when processing sequences
2430    /// that occur out-of-order. This field is used to prevent potential
2431    /// denial-of-service (DoS) attacks which could cause infinite memory
2432    /// consumption when parsing a very large amount of XML into a sequence field.
2433    #[cfg(feature = "overlapped-lists")]
2434    limit: Option<NonZeroUsize>,
2435
2436    #[cfg(not(feature = "overlapped-lists"))]
2437    peek: Option<DeEvent<'de>>,
2438
2439    /// Buffer to store attribute name as a field name exposed to serde consumers
2440    key_buf: String,
2441}
2442
2443impl<'de, R, E> Deserializer<'de, R, E>
2444where
2445    R: XmlRead<'de>,
2446    E: EntityResolver,
2447{
2448    /// Create an XML deserializer from one of the possible quick_xml input sources.
2449    ///
2450    /// Typically it is more convenient to use one of these methods instead:
2451    ///
2452    ///  - [`Deserializer::from_str`]
2453    ///  - [`Deserializer::from_reader`]
2454    fn new(reader: R, entity_resolver: E) -> Self {
2455        Self {
2456            reader: XmlReader::new(reader, entity_resolver),
2457
2458            #[cfg(feature = "overlapped-lists")]
2459            read: VecDeque::new(),
2460            #[cfg(feature = "overlapped-lists")]
2461            write: VecDeque::new(),
2462            #[cfg(feature = "overlapped-lists")]
2463            limit: None,
2464
2465            #[cfg(not(feature = "overlapped-lists"))]
2466            peek: None,
2467
2468            key_buf: String::new(),
2469        }
2470    }
2471
2472    /// Returns `true` if all events was consumed.
2473    pub fn is_empty(&self) -> bool {
2474        #[cfg(feature = "overlapped-lists")]
2475        if self.read.is_empty() {
2476            return self.reader.is_empty();
2477        }
2478        #[cfg(not(feature = "overlapped-lists"))]
2479        if self.peek.is_none() {
2480            return self.reader.is_empty();
2481        }
2482        false
2483    }
2484
2485    /// Returns the underlying XML reader.
2486    ///
2487    /// ```
2488    /// # use pretty_assertions::assert_eq;
2489    /// use serde::Deserialize;
2490    /// use quick_xml::de::Deserializer;
2491    /// use quick_xml::NsReader;
2492    ///
2493    /// #[derive(Deserialize)]
2494    /// struct SomeStruct {
2495    ///     field1: String,
2496    ///     field2: String,
2497    /// }
2498    ///
2499    /// // Try to deserialize from broken XML
2500    /// let mut de = Deserializer::from_str(
2501    ///     "<SomeStruct><field1><field2></SomeStruct>"
2502    /// //   0                           ^= 28        ^= 41
2503    /// );
2504    ///
2505    /// let err = SomeStruct::deserialize(&mut de);
2506    /// assert!(err.is_err());
2507    ///
2508    /// let reader: &NsReader<_> = de.get_ref().get_ref();
2509    ///
2510    /// assert_eq!(reader.error_position(), 28);
2511    /// assert_eq!(reader.buffer_position(), 41);
2512    /// ```
2513    pub const fn get_ref(&self) -> &R {
2514        &self.reader.reader
2515    }
2516
2517    /// Set the maximum number of events that could be skipped during deserialization
2518    /// of sequences.
2519    ///
2520    /// If `<element>` contains more than specified nested elements, `$text` or
2521    /// CDATA nodes, then [`DeError::TooManyEvents`] will be returned during
2522    /// deserialization of sequence field (any type that uses [`deserialize_seq`]
2523    /// for the deserialization, for example, `Vec<T>`).
2524    ///
2525    /// This method can be used to prevent a [DoS] attack and infinite memory
2526    /// consumption when parsing a very large XML to a sequence field.
2527    ///
2528    /// It is strongly recommended to set limit to some value when you parse data
2529    /// from untrusted sources. You should choose a value that your typical XMLs
2530    /// can have _between_ different elements that corresponds to the same sequence.
2531    ///
2532    /// # Examples
2533    ///
2534    /// Let's imagine, that we deserialize such structure:
2535    /// ```
2536    /// struct List {
2537    ///   item: Vec<()>,
2538    /// }
2539    /// ```
2540    ///
2541    /// The XML that we try to parse look like this:
2542    /// ```xml
2543    /// <any-name>
2544    ///   <item/>
2545    ///   <!-- Bufferization starts at this point -->
2546    ///   <another-item>
2547    ///     <some-element>with text</some-element>
2548    ///     <yet-another-element/>
2549    ///   </another-item>
2550    ///   <!-- Buffer will be emptied at this point; 7 events were buffered -->
2551    ///   <item/>
2552    ///   <!-- There is nothing to buffer, because elements follows each other -->
2553    ///   <item/>
2554    /// </any-name>
2555    /// ```
2556    ///
2557    /// There, when we deserialize the `item` field, we need to buffer 7 events,
2558    /// before we can deserialize the second `<item/>`:
2559    ///
2560    /// - `<another-item>`
2561    /// - `<some-element>`
2562    /// - `$text(with text)`
2563    /// - `</some-element>`
2564    /// - `<yet-another-element/>` (virtual start event)
2565    /// - `<yet-another-element/>` (virtual end event)
2566    /// - `</another-item>`
2567    ///
2568    /// Note, that `<yet-another-element/>` internally represented as 2 events:
2569    /// one for the start tag and one for the end tag. In the future this can be
2570    /// eliminated, but for now we use [auto-expanding feature] of a reader,
2571    /// because this simplifies deserializer code.
2572    ///
2573    /// [`deserialize_seq`]: serde::Deserializer::deserialize_seq
2574    /// [DoS]: https://en.wikipedia.org/wiki/Denial-of-service_attack
2575    /// [auto-expanding feature]: crate::reader::Config::expand_empty_elements
2576    #[cfg(feature = "overlapped-lists")]
2577    pub fn event_buffer_size(&mut self, limit: Option<NonZeroUsize>) -> &mut Self {
2578        self.limit = limit;
2579        self
2580    }
2581
2582    #[cfg(feature = "overlapped-lists")]
2583    fn peek(&mut self) -> Result<&DeEvent<'de>, DeError> {
2584        if self.read.is_empty() {
2585            self.read.push_front(self.reader.next()?);
2586        }
2587        if let Some(event) = self.read.front() {
2588            return Ok(event);
2589        }
2590        // SAFETY: `self.read` was filled in the code above.
2591        // NOTE: Can be replaced with `unsafe { std::hint::unreachable_unchecked() }`
2592        // if unsafe code will be allowed
2593        unreachable!()
2594    }
2595    #[cfg(not(feature = "overlapped-lists"))]
2596    fn peek(&mut self) -> Result<&DeEvent<'de>, DeError> {
2597        if self.peek.is_none() {
2598            self.peek = Some(self.reader.next()?);
2599        }
2600        match self.peek.as_ref() {
2601            Some(v) => Ok(v),
2602            // SAFETY: a `None` variant for `self.peek` would have been replaced
2603            // by a `Some` variant in the code above.
2604            // TODO: Can be replaced with `unsafe { std::hint::unreachable_unchecked() }`
2605            // if unsafe code will be allowed
2606            None => unreachable!(),
2607        }
2608    }
2609
2610    #[inline]
2611    fn last_peeked(&self) -> &DeEvent<'de> {
2612        #[cfg(feature = "overlapped-lists")]
2613        {
2614            self.read
2615                .front()
2616                .expect("`Deserializer::peek()` should be called")
2617        }
2618        #[cfg(not(feature = "overlapped-lists"))]
2619        {
2620            self.peek
2621                .as_ref()
2622                .expect("`Deserializer::peek()` should be called")
2623        }
2624    }
2625
2626    fn next(&mut self) -> Result<DeEvent<'de>, DeError> {
2627        // Replay skipped or peeked events
2628        #[cfg(feature = "overlapped-lists")]
2629        if let Some(event) = self.read.pop_front() {
2630            return Ok(event);
2631        }
2632        #[cfg(not(feature = "overlapped-lists"))]
2633        if let Some(e) = self.peek.take() {
2634            return Ok(e);
2635        }
2636        self.reader.next()
2637    }
2638
2639    /// Returns the mark after which all events, skipped by [`Self::skip()`] call,
2640    /// should be replayed after calling [`Self::start_replay()`].
2641    #[cfg(feature = "overlapped-lists")]
2642    #[inline]
2643    #[must_use = "returned checkpoint should be used in `start_replay`"]
2644    fn skip_checkpoint(&self) -> usize {
2645        self.write.len()
2646    }
2647
2648    /// Extracts XML tree of events from and stores them in the skipped events
2649    /// buffer from which they can be retrieved later. You MUST call
2650    /// [`Self::start_replay()`] after calling this to give access to the skipped
2651    /// events and release internal buffers.
2652    #[cfg(feature = "overlapped-lists")]
2653    fn skip(&mut self) -> Result<(), DeError> {
2654        let event = self.next()?;
2655        self.skip_event(event)?;
2656        match self.write.back() {
2657            // Skip all subtree, if we skip a start event
2658            Some(DeEvent::Start(e)) => {
2659                let end = e.name().as_ref().to_owned();
2660                let mut depth = 0;
2661                loop {
2662                    let event = self.next()?;
2663                    match event {
2664                        DeEvent::Start(ref e) if e.name().as_ref() == end => {
2665                            self.skip_event(event)?;
2666                            depth += 1;
2667                        }
2668                        DeEvent::End(ref e) if e.name().as_ref() == end => {
2669                            self.skip_event(event)?;
2670                            if depth == 0 {
2671                                break;
2672                            }
2673                            depth -= 1;
2674                        }
2675                        DeEvent::Eof => {
2676                            self.skip_event(event)?;
2677                            break;
2678                        }
2679                        _ => self.skip_event(event)?,
2680                    }
2681                }
2682            }
2683            _ => (),
2684        }
2685        Ok(())
2686    }
2687
2688    #[cfg(feature = "overlapped-lists")]
2689    #[inline]
2690    fn skip_event(&mut self, event: DeEvent<'de>) -> Result<(), DeError> {
2691        if let Some(max) = self.limit {
2692            if self.write.len() >= max.get() {
2693                return Err(DeError::TooManyEvents(max));
2694            }
2695        }
2696        self.write.push_back(event);
2697        Ok(())
2698    }
2699
2700    /// Moves buffered events, skipped after given `checkpoint` from [`Self::write`]
2701    /// skip buffer to [`Self::read`] buffer.
2702    ///
2703    /// After calling this method, [`Self::peek()`] and [`Self::next()`] starts
2704    /// return events that was skipped previously by calling [`Self::skip()`],
2705    /// and only when all that events will be consumed, the deserializer starts
2706    /// to drain events from underlying reader.
2707    ///
2708    /// This method MUST be called if any number of [`Self::skip()`] was called
2709    /// after [`Self::new()`] or `start_replay()` or you'll lost events.
2710    #[cfg(feature = "overlapped-lists")]
2711    fn start_replay(&mut self, checkpoint: usize) {
2712        if checkpoint == 0 {
2713            self.write.append(&mut self.read);
2714            std::mem::swap(&mut self.read, &mut self.write);
2715        } else {
2716            let mut read = self.write.split_off(checkpoint);
2717            read.append(&mut self.read);
2718            self.read = read;
2719        }
2720    }
2721
2722    #[inline]
2723    fn read_string(&mut self) -> Result<Cow<'de, str>, DeError> {
2724        self.read_string_impl(true)
2725    }
2726
2727    /// Consumes consequent [`Text`] and [`CData`] (both a referred below as a _text_)
2728    /// events, merge them into one string. If there are no such events, returns
2729    /// an empty string.
2730    ///
2731    /// If `allow_start` is `false`, then only text events are consumed, for other
2732    /// events an error is returned (see table below).
2733    ///
2734    /// If `allow_start` is `true`, then two or three events are expected:
2735    /// - [`DeEvent::Start`];
2736    /// - _(optional)_ [`DeEvent::Text`] which content is returned;
2737    /// - [`DeEvent::End`]. If text event was missed, an empty string is returned.
2738    ///
2739    /// Corresponding events are consumed.
2740    ///
2741    /// # Handling events
2742    ///
2743    /// The table below shows how events is handled by this method:
2744    ///
2745    /// |Event             |XML                        |Handling
2746    /// |------------------|---------------------------|----------------------------------------
2747    /// |[`DeEvent::Start`]|`<tag>...</tag>`           |if `allow_start == true`, result determined by the second table, otherwise emits [`UnexpectedStart("tag")`](DeError::UnexpectedStart)
2748    /// |[`DeEvent::End`]  |`</any-tag>`               |This is impossible situation, the method will panic if it happens
2749    /// |[`DeEvent::Text`] |`text content` or `<![CDATA[cdata content]]>` (probably mixed)|Returns event content unchanged
2750    /// |[`DeEvent::Eof`]  |                           |Emits [`UnexpectedEof`](DeError::UnexpectedEof)
2751    ///
2752    /// Second event, consumed if [`DeEvent::Start`] was received and `allow_start == true`:
2753    ///
2754    /// |Event             |XML                        |Handling
2755    /// |------------------|---------------------------|----------------------------------------------------------------------------------
2756    /// |[`DeEvent::Start`]|`<any-tag>...</any-tag>`   |Emits [`UnexpectedStart("any-tag")`](DeError::UnexpectedStart)
2757    /// |[`DeEvent::End`]  |`</tag>`                   |Returns an empty slice. The reader guarantee that tag will match the open one
2758    /// |[`DeEvent::Text`] |`text content` or `<![CDATA[cdata content]]>` (probably mixed)|Returns event content unchanged, expects the `</tag>` after that
2759    /// |[`DeEvent::Eof`]  |                           |Emits [`InvalidXml(IllFormed(MissingEndTag))`](DeError::InvalidXml)
2760    ///
2761    /// [`Text`]: Event::Text
2762    /// [`CData`]: Event::CData
2763    fn read_string_impl(&mut self, allow_start: bool) -> Result<Cow<'de, str>, DeError> {
2764        match self.next()? {
2765            DeEvent::Text(e) => Ok(e.text),
2766            // allow one nested level
2767            DeEvent::Start(e) if allow_start => self.read_text(e.name()),
2768            DeEvent::Start(e) => Err(DeError::UnexpectedStart(e.name().as_ref().to_owned())),
2769            // SAFETY: The reader is guaranteed that we don't have unmatched tags
2770            // If we here, then out deserializer has a bug
2771            DeEvent::End(e) => unreachable!("{:?}", e),
2772            DeEvent::Eof => Err(DeError::UnexpectedEof),
2773        }
2774    }
2775    /// Consumes one [`DeEvent::Text`] event and ensures that it is followed by the
2776    /// [`DeEvent::End`] event.
2777    ///
2778    /// # Parameters
2779    /// - `name`: name of a tag opened before reading text. The corresponding end tag
2780    ///   should present in input just after the text
2781    fn read_text(&mut self, name: QName) -> Result<Cow<'de, str>, DeError> {
2782        match self.next()? {
2783            DeEvent::Text(e) => match self.next()? {
2784                // The matching tag name is guaranteed by the reader
2785                DeEvent::End(_) => Ok(e.text),
2786                // SAFETY: Cannot be two consequent Text events, they would be merged into one
2787                DeEvent::Text(_) => unreachable!(),
2788                DeEvent::Start(e) => Err(DeError::UnexpectedStart(e.name().as_ref().to_owned())),
2789                DeEvent::Eof => Err(Error::missed_end(name, self.reader.decoder()).into()),
2790            },
2791            // We can get End event in case of `<tag></tag>` or `<tag/>` input
2792            // Return empty text in that case
2793            // The matching tag name is guaranteed by the reader
2794            DeEvent::End(_) => Ok("".into()),
2795            DeEvent::Start(s) => Err(DeError::UnexpectedStart(s.name().as_ref().to_owned())),
2796            DeEvent::Eof => Err(Error::missed_end(name, self.reader.decoder()).into()),
2797        }
2798    }
2799
2800    /// Drops all events until event with [name](BytesEnd::name()) `name` won't be
2801    /// dropped. This method should be called after [`Self::next()`]
2802    #[cfg(feature = "overlapped-lists")]
2803    fn read_to_end(&mut self, name: QName) -> Result<(), DeError> {
2804        let mut depth = 0;
2805        loop {
2806            match self.read.pop_front() {
2807                Some(DeEvent::Start(e)) if e.name() == name => {
2808                    depth += 1;
2809                }
2810                Some(DeEvent::End(e)) if e.name() == name => {
2811                    if depth == 0 {
2812                        break;
2813                    }
2814                    depth -= 1;
2815                }
2816
2817                // Drop all other skipped events
2818                Some(_) => continue,
2819
2820                // If we do not have skipped events, use effective reading that will
2821                // not allocate memory for events
2822                None => {
2823                    // We should close all opened tags, because we could buffer
2824                    // Start events, but not the corresponding End events. So we
2825                    // keep reading events until we exit all nested tags.
2826                    // `read_to_end()` will return an error if an Eof was encountered
2827                    // preliminary (in case of malformed XML).
2828                    //
2829                    // <tag><tag></tag></tag>
2830                    // ^^^^^^^^^^             - buffered in `self.read`, when `self.read_to_end()` is called, depth = 2
2831                    //           ^^^^^^       - read by the first call of `self.reader.read_to_end()`
2832                    //                 ^^^^^^ - read by the second call of `self.reader.read_to_end()`
2833                    loop {
2834                        self.reader.read_to_end(name)?;
2835                        if depth == 0 {
2836                            break;
2837                        }
2838                        depth -= 1;
2839                    }
2840                    break;
2841                }
2842            }
2843        }
2844        Ok(())
2845    }
2846    #[cfg(not(feature = "overlapped-lists"))]
2847    fn read_to_end(&mut self, name: QName) -> Result<(), DeError> {
2848        // First one might be in self.peek
2849        match self.next()? {
2850            DeEvent::Start(e) => self.reader.read_to_end(e.name())?,
2851            DeEvent::End(e) if e.name() == name => return Ok(()),
2852            _ => (),
2853        }
2854        self.reader.read_to_end(name)
2855    }
2856
2857    fn skip_next_tree(&mut self) -> Result<(), DeError> {
2858        let DeEvent::Start(start) = self.next()? else {
2859            unreachable!("Only call this if the next event is a start event")
2860        };
2861        let name = start.name();
2862        self.read_to_end(name)
2863    }
2864}
2865
2866impl<'de> Deserializer<'de, SliceReader<'de>> {
2867    /// Create new deserializer that will borrow data from the specified string.
2868    ///
2869    /// Deserializer created with this method will not resolve custom entities.
2870    #[allow(clippy::should_implement_trait)]
2871    pub fn from_str(source: &'de str) -> Self {
2872        Self::from_str_with_resolver(source, PredefinedEntityResolver)
2873    }
2874}
2875
2876impl<'de, E> Deserializer<'de, SliceReader<'de>, E>
2877where
2878    E: EntityResolver,
2879{
2880    /// Create new deserializer that will borrow data from the specified string
2881    /// and use specified entity resolver.
2882    pub fn from_str_with_resolver(source: &'de str, entity_resolver: E) -> Self {
2883        let mut reader = NsReader::from_str(source);
2884        let config = reader.config_mut();
2885        config.expand_empty_elements = true;
2886
2887        Self::new(
2888            SliceReader {
2889                reader,
2890                start_trimmer: StartTrimmer::default(),
2891            },
2892            entity_resolver,
2893        )
2894    }
2895}
2896
2897impl<'de, R> Deserializer<'de, IoReader<R>>
2898where
2899    R: BufRead,
2900{
2901    /// Create new deserializer that will copy data from the specified reader
2902    /// into internal buffer.
2903    ///
2904    /// If you already have a string use [`Self::from_str`] instead, because it
2905    /// will borrow instead of copy. If you have `&[u8]` which is known to represent
2906    /// UTF-8, you can decode it first before using [`from_str`].
2907    ///
2908    /// Deserializer created with this method will not resolve custom entities.
2909    pub fn from_reader(reader: R) -> Self {
2910        Self::with_resolver(reader, PredefinedEntityResolver)
2911    }
2912}
2913
2914impl<'de, R, E> Deserializer<'de, IoReader<R>, E>
2915where
2916    R: BufRead,
2917    E: EntityResolver,
2918{
2919    /// Create new deserializer that will copy data from the specified reader
2920    /// into internal buffer and use specified entity resolver.
2921    ///
2922    /// If you already have a string use [`Self::from_str`] instead, because it
2923    /// will borrow instead of copy. If you have `&[u8]` which is known to represent
2924    /// UTF-8, you can decode it first before using [`from_str`].
2925    pub fn with_resolver(reader: R, entity_resolver: E) -> Self {
2926        let mut reader = NsReader::from_reader(reader);
2927        let config = reader.config_mut();
2928        config.expand_empty_elements = true;
2929
2930        Self::new(
2931            IoReader {
2932                reader,
2933                start_trimmer: StartTrimmer::default(),
2934                buf: Vec::new(),
2935            },
2936            entity_resolver,
2937        )
2938    }
2939}
2940
2941impl<'de, 'a, R, E> de::Deserializer<'de> for &'a mut Deserializer<'de, R, E>
2942where
2943    R: XmlRead<'de>,
2944    E: EntityResolver,
2945{
2946    type Error = DeError;
2947
2948    deserialize_primitives!();
2949
2950    fn deserialize_struct<V>(
2951        self,
2952        _name: &'static str,
2953        fields: &'static [&'static str],
2954        visitor: V,
2955    ) -> Result<V::Value, DeError>
2956    where
2957        V: Visitor<'de>,
2958    {
2959        match self.next()? {
2960            DeEvent::Start(e) => visitor.visit_map(ElementMapAccess::new(self, e, fields)?),
2961            // SAFETY: The reader is guaranteed that we don't have unmatched tags
2962            // If we here, then out deserializer has a bug
2963            DeEvent::End(e) => unreachable!("{:?}", e),
2964            // Deserializer methods are only hints, if deserializer could not satisfy
2965            // request, it should return the data that it has. It is responsibility
2966            // of a Visitor to return an error if it does not understand the data
2967            DeEvent::Text(e) => match e.text {
2968                Cow::Borrowed(s) => visitor.visit_borrowed_str(s),
2969                Cow::Owned(s) => visitor.visit_string(s),
2970            },
2971            DeEvent::Eof => Err(DeError::UnexpectedEof),
2972        }
2973    }
2974
2975    /// Unit represented in XML as a `xs:element` or text/CDATA content.
2976    /// Any content inside `xs:element` is ignored and skipped.
2977    ///
2978    /// Produces unit struct from any of following inputs:
2979    /// - any `<tag ...>...</tag>`
2980    /// - any `<tag .../>`
2981    /// - any consequent text / CDATA content (can consist of several parts
2982    ///   delimited by comments and processing instructions)
2983    ///
2984    /// # Events handling
2985    ///
2986    /// |Event             |XML                        |Handling
2987    /// |------------------|---------------------------|-------------------------------------------
2988    /// |[`DeEvent::Start`]|`<tag>...</tag>`           |Calls `visitor.visit_unit()`, consumes all events up to and including corresponding `End` event
2989    /// |[`DeEvent::End`]  |`</tag>`                   |This is impossible situation, the method will panic if it happens
2990    /// |[`DeEvent::Text`] |`text content` or `<![CDATA[cdata content]]>` (probably mixed)|Calls `visitor.visit_unit()`. The content is ignored
2991    /// |[`DeEvent::Eof`]  |                           |Emits [`UnexpectedEof`](DeError::UnexpectedEof)
2992    fn deserialize_unit<V>(self, visitor: V) -> Result<V::Value, DeError>
2993    where
2994        V: Visitor<'de>,
2995    {
2996        match self.next()? {
2997            DeEvent::Start(s) => {
2998                self.read_to_end(s.name())?;
2999                visitor.visit_unit()
3000            }
3001            DeEvent::Text(_) => visitor.visit_unit(),
3002            // SAFETY: The reader is guaranteed that we don't have unmatched tags
3003            // If we here, then out deserializer has a bug
3004            DeEvent::End(e) => unreachable!("{:?}", e),
3005            DeEvent::Eof => Err(DeError::UnexpectedEof),
3006        }
3007    }
3008
3009    /// Forwards deserialization of the inner type. Always calls [`Visitor::visit_newtype_struct`]
3010    /// with the same deserializer.
3011    fn deserialize_newtype_struct<V>(
3012        self,
3013        _name: &'static str,
3014        visitor: V,
3015    ) -> Result<V::Value, DeError>
3016    where
3017        V: Visitor<'de>,
3018    {
3019        visitor.visit_newtype_struct(self)
3020    }
3021
3022    fn deserialize_enum<V>(
3023        self,
3024        _name: &'static str,
3025        _variants: &'static [&'static str],
3026        visitor: V,
3027    ) -> Result<V::Value, DeError>
3028    where
3029        V: Visitor<'de>,
3030    {
3031        visitor.visit_enum(var::EnumAccess::new(self))
3032    }
3033
3034    fn deserialize_seq<V>(self, visitor: V) -> Result<V::Value, DeError>
3035    where
3036        V: Visitor<'de>,
3037    {
3038        visitor.visit_seq(self)
3039    }
3040
3041    fn deserialize_option<V>(self, visitor: V) -> Result<V::Value, DeError>
3042    where
3043        V: Visitor<'de>,
3044    {
3045        // We cannot use result of `peek()` directly because of borrow checker
3046        let _ = self.peek()?;
3047        match self.last_peeked() {
3048            DeEvent::Text(t) if t.is_empty() => visitor.visit_none(),
3049            DeEvent::Eof => visitor.visit_none(),
3050            // if the `xsi:nil` attribute is set to true we got a none value
3051            DeEvent::Start(start) if self.reader.reader.has_nil_attr(&start) => {
3052                self.skip_next_tree()?;
3053                visitor.visit_none()
3054            }
3055            _ => visitor.visit_some(self),
3056        }
3057    }
3058
3059    fn deserialize_any<V>(self, visitor: V) -> Result<V::Value, DeError>
3060    where
3061        V: Visitor<'de>,
3062    {
3063        match self.peek()? {
3064            DeEvent::Text(_) => self.deserialize_str(visitor),
3065            _ => self.deserialize_map(visitor),
3066        }
3067    }
3068}
3069
3070/// An accessor to sequence elements forming a value for top-level sequence of XML
3071/// elements.
3072///
3073/// Technically, multiple top-level elements violates XML rule of only one top-level
3074/// element, but we consider this as several concatenated XML documents.
3075impl<'de, 'a, R, E> SeqAccess<'de> for &'a mut Deserializer<'de, R, E>
3076where
3077    R: XmlRead<'de>,
3078    E: EntityResolver,
3079{
3080    type Error = DeError;
3081
3082    fn next_element_seed<T>(&mut self, seed: T) -> Result<Option<T::Value>, Self::Error>
3083    where
3084        T: DeserializeSeed<'de>,
3085    {
3086        match self.peek()? {
3087            DeEvent::Eof => {
3088                // We need to consume event in order to self.is_empty() worked
3089                self.next()?;
3090                Ok(None)
3091            }
3092
3093            // Start(tag), End(tag), Text
3094            _ => seed.deserialize(&mut **self).map(Some),
3095        }
3096    }
3097}
3098
3099impl<'de, 'a, R, E> IntoDeserializer<'de, DeError> for &'a mut Deserializer<'de, R, E>
3100where
3101    R: XmlRead<'de>,
3102    E: EntityResolver,
3103{
3104    type Deserializer = Self;
3105
3106    #[inline]
3107    fn into_deserializer(self) -> Self {
3108        self
3109    }
3110}
3111
3112////////////////////////////////////////////////////////////////////////////////////////////////////
3113
3114/// Helper struct that contains a state for an algorithm of converting events
3115/// from raw events to semi-trimmed events that is independent from a way of
3116/// events reading.
3117struct StartTrimmer {
3118    /// If `true`, then leading whitespace will be removed from next returned
3119    /// [`Event::Text`]. This field is set to `true` after reading each event
3120    /// except [`Event::Text`] and [`Event::CData`], so [`Event::Text`] events
3121    /// read right after them does not trimmed.
3122    trim_start: bool,
3123}
3124
3125impl StartTrimmer {
3126    /// Converts raw reader's event into a payload event.
3127    /// Returns `None`, if event should be skipped.
3128    #[inline(always)]
3129    fn trim<'a>(&mut self, event: Event<'a>) -> Option<PayloadEvent<'a>> {
3130        let (event, trim_next_event) = match event {
3131            Event::DocType(e) => (PayloadEvent::DocType(e), true),
3132            Event::Start(e) => (PayloadEvent::Start(e), true),
3133            Event::End(e) => (PayloadEvent::End(e), true),
3134            Event::Eof => (PayloadEvent::Eof, true),
3135
3136            // Do not trim next text event after Text or CDATA event
3137            Event::CData(e) => (PayloadEvent::CData(e), false),
3138            Event::Text(mut e) => {
3139                // If event is empty after trimming, skip it
3140                if self.trim_start && e.inplace_trim_start() {
3141                    return None;
3142                }
3143                (PayloadEvent::Text(e), false)
3144            }
3145
3146            _ => return None,
3147        };
3148        self.trim_start = trim_next_event;
3149        Some(event)
3150    }
3151}
3152
3153impl Default for StartTrimmer {
3154    #[inline]
3155    fn default() -> Self {
3156        Self { trim_start: true }
3157    }
3158}
3159
3160////////////////////////////////////////////////////////////////////////////////////////////////////
3161
3162/// Trait used by the deserializer for iterating over input. This is manually
3163/// "specialized" for iterating over `&[u8]`.
3164///
3165/// You do not need to implement this trait, it is needed to abstract from
3166/// [borrowing](SliceReader) and [copying](IoReader) data sources and reuse code in
3167/// deserializer
3168pub trait XmlRead<'i> {
3169    /// Return an input-borrowing event.
3170    fn next(&mut self) -> Result<PayloadEvent<'i>, DeError>;
3171
3172    /// Skips until end element is found. Unlike `next()` it will not allocate
3173    /// when it cannot satisfy the lifetime.
3174    fn read_to_end(&mut self, name: QName) -> Result<(), DeError>;
3175
3176    /// A copy of the reader's decoder used to decode strings.
3177    fn decoder(&self) -> Decoder;
3178
3179    /// Checks if the `start` tag has a [`xsi:nil`] attribute. This method ignores
3180    /// any errors in attributes.
3181    ///
3182    /// [`xsi:nil`]: https://www.w3.org/TR/xmlschema-1/#xsi_nil
3183    fn has_nil_attr(&self, start: &BytesStart) -> bool;
3184}
3185
3186/// XML input source that reads from a std::io input stream.
3187///
3188/// You cannot create it, it is created automatically when you call
3189/// [`Deserializer::from_reader`]
3190pub struct IoReader<R: BufRead> {
3191    reader: NsReader<R>,
3192    start_trimmer: StartTrimmer,
3193    buf: Vec<u8>,
3194}
3195
3196impl<R: BufRead> IoReader<R> {
3197    /// Returns the underlying XML reader.
3198    ///
3199    /// ```
3200    /// # use pretty_assertions::assert_eq;
3201    /// use serde::Deserialize;
3202    /// use std::io::Cursor;
3203    /// use quick_xml::de::Deserializer;
3204    /// use quick_xml::NsReader;
3205    ///
3206    /// #[derive(Deserialize)]
3207    /// struct SomeStruct {
3208    ///     field1: String,
3209    ///     field2: String,
3210    /// }
3211    ///
3212    /// // Try to deserialize from broken XML
3213    /// let mut de = Deserializer::from_reader(Cursor::new(
3214    ///     "<SomeStruct><field1><field2></SomeStruct>"
3215    /// //   0                           ^= 28        ^= 41
3216    /// ));
3217    ///
3218    /// let err = SomeStruct::deserialize(&mut de);
3219    /// assert!(err.is_err());
3220    ///
3221    /// let reader: &NsReader<Cursor<&str>> = de.get_ref().get_ref();
3222    ///
3223    /// assert_eq!(reader.error_position(), 28);
3224    /// assert_eq!(reader.buffer_position(), 41);
3225    /// ```
3226    pub const fn get_ref(&self) -> &NsReader<R> {
3227        &self.reader
3228    }
3229}
3230
3231impl<'i, R: BufRead> XmlRead<'i> for IoReader<R> {
3232    fn next(&mut self) -> Result<PayloadEvent<'static>, DeError> {
3233        loop {
3234            self.buf.clear();
3235
3236            let event = self.reader.read_event_into(&mut self.buf)?;
3237            if let Some(event) = self.start_trimmer.trim(event) {
3238                return Ok(event.into_owned());
3239            }
3240        }
3241    }
3242
3243    fn read_to_end(&mut self, name: QName) -> Result<(), DeError> {
3244        match self.reader.read_to_end_into(name, &mut self.buf) {
3245            Err(e) => Err(e.into()),
3246            Ok(_) => Ok(()),
3247        }
3248    }
3249
3250    fn decoder(&self) -> Decoder {
3251        self.reader.decoder()
3252    }
3253
3254    fn has_nil_attr(&self, start: &BytesStart) -> bool {
3255        start.attributes().has_nil(&self.reader)
3256    }
3257}
3258
3259/// XML input source that reads from a slice of bytes and can borrow from it.
3260///
3261/// You cannot create it, it is created automatically when you call
3262/// [`Deserializer::from_str`].
3263pub struct SliceReader<'de> {
3264    reader: NsReader<&'de [u8]>,
3265    start_trimmer: StartTrimmer,
3266}
3267
3268impl<'de> SliceReader<'de> {
3269    /// Returns the underlying XML reader.
3270    ///
3271    /// ```
3272    /// # use pretty_assertions::assert_eq;
3273    /// use serde::Deserialize;
3274    /// use quick_xml::de::Deserializer;
3275    /// use quick_xml::NsReader;
3276    ///
3277    /// #[derive(Deserialize)]
3278    /// struct SomeStruct {
3279    ///     field1: String,
3280    ///     field2: String,
3281    /// }
3282    ///
3283    /// // Try to deserialize from broken XML
3284    /// let mut de = Deserializer::from_str(
3285    ///     "<SomeStruct><field1><field2></SomeStruct>"
3286    /// //   0                           ^= 28        ^= 41
3287    /// );
3288    ///
3289    /// let err = SomeStruct::deserialize(&mut de);
3290    /// assert!(err.is_err());
3291    ///
3292    /// let reader: &NsReader<&[u8]> = de.get_ref().get_ref();
3293    ///
3294    /// assert_eq!(reader.error_position(), 28);
3295    /// assert_eq!(reader.buffer_position(), 41);
3296    /// ```
3297    pub const fn get_ref(&self) -> &NsReader<&'de [u8]> {
3298        &self.reader
3299    }
3300}
3301
3302impl<'de> XmlRead<'de> for SliceReader<'de> {
3303    fn next(&mut self) -> Result<PayloadEvent<'de>, DeError> {
3304        loop {
3305            let event = self.reader.read_event()?;
3306            if let Some(event) = self.start_trimmer.trim(event) {
3307                return Ok(event);
3308            }
3309        }
3310    }
3311
3312    fn read_to_end(&mut self, name: QName) -> Result<(), DeError> {
3313        match self.reader.read_to_end(name) {
3314            Err(e) => Err(e.into()),
3315            Ok(_) => Ok(()),
3316        }
3317    }
3318
3319    fn decoder(&self) -> Decoder {
3320        self.reader.decoder()
3321    }
3322
3323    fn has_nil_attr(&self, start: &BytesStart) -> bool {
3324        start.attributes().has_nil(&self.reader)
3325    }
3326}
3327
3328#[cfg(test)]
3329mod tests {
3330    use super::*;
3331    use crate::errors::IllFormedError;
3332    use pretty_assertions::assert_eq;
3333
3334    fn make_de<'de>(source: &'de str) -> Deserializer<'de, SliceReader<'de>> {
3335        dbg!(source);
3336        Deserializer::from_str(source)
3337    }
3338
3339    #[cfg(feature = "overlapped-lists")]
3340    mod skip {
3341        use super::*;
3342        use crate::de::DeEvent::*;
3343        use crate::events::BytesEnd;
3344        use pretty_assertions::assert_eq;
3345
3346        /// Checks that `peek()` and `read()` behaves correctly after `skip()`
3347        #[test]
3348        fn read_and_peek() {
3349            let mut de = make_de(
3350                r#"
3351                <root>
3352                    <inner>
3353                        text
3354                        <inner/>
3355                    </inner>
3356                    <next/>
3357                    <target/>
3358                </root>
3359                "#,
3360            );
3361
3362            // Initial conditions - both are empty
3363            assert_eq!(de.read, vec![]);
3364            assert_eq!(de.write, vec![]);
3365
3366            assert_eq!(de.next().unwrap(), Start(BytesStart::new("root")));
3367            assert_eq!(de.peek().unwrap(), &Start(BytesStart::new("inner")));
3368
3369            // Mark that start_replay() should begin replay from this point
3370            let checkpoint = de.skip_checkpoint();
3371            assert_eq!(checkpoint, 0);
3372
3373            // Should skip first <inner> tree
3374            de.skip().unwrap();
3375            assert_eq!(de.read, vec![]);
3376            assert_eq!(
3377                de.write,
3378                vec![
3379                    Start(BytesStart::new("inner")),
3380                    Text("text".into()),
3381                    Start(BytesStart::new("inner")),
3382                    End(BytesEnd::new("inner")),
3383                    End(BytesEnd::new("inner")),
3384                ]
3385            );
3386
3387            // Consume <next/>. Now unconsumed XML looks like:
3388            //
3389            //   <inner>
3390            //     text
3391            //     <inner/>
3392            //   </inner>
3393            //   <target/>
3394            // </root>
3395            assert_eq!(de.next().unwrap(), Start(BytesStart::new("next")));
3396            assert_eq!(de.next().unwrap(), End(BytesEnd::new("next")));
3397
3398            // We finish writing. Next call to `next()` should start replay that messages:
3399            //
3400            //   <inner>
3401            //     text
3402            //     <inner/>
3403            //   </inner>
3404            //
3405            // and after that stream that messages:
3406            //
3407            //   <target/>
3408            // </root>
3409            de.start_replay(checkpoint);
3410            assert_eq!(
3411                de.read,
3412                vec![
3413                    Start(BytesStart::new("inner")),
3414                    Text("text".into()),
3415                    Start(BytesStart::new("inner")),
3416                    End(BytesEnd::new("inner")),
3417                    End(BytesEnd::new("inner")),
3418                ]
3419            );
3420            assert_eq!(de.write, vec![]);
3421            assert_eq!(de.next().unwrap(), Start(BytesStart::new("inner")));
3422
3423            // Mark that start_replay() should begin replay from this point
3424            let checkpoint = de.skip_checkpoint();
3425            assert_eq!(checkpoint, 0);
3426
3427            // Skip `$text` node and consume <inner/> after it
3428            de.skip().unwrap();
3429            assert_eq!(
3430                de.read,
3431                vec![
3432                    Start(BytesStart::new("inner")),
3433                    End(BytesEnd::new("inner")),
3434                    End(BytesEnd::new("inner")),
3435                ]
3436            );
3437            assert_eq!(
3438                de.write,
3439                vec![
3440                    // This comment here to keep the same formatting of both arrays
3441                    // otherwise rustfmt suggest one-line it
3442                    Text("text".into()),
3443                ]
3444            );
3445
3446            assert_eq!(de.next().unwrap(), Start(BytesStart::new("inner")));
3447            assert_eq!(de.next().unwrap(), End(BytesEnd::new("inner")));
3448
3449            // We finish writing. Next call to `next()` should start replay messages:
3450            //
3451            //     text
3452            //   </inner>
3453            //
3454            // and after that stream that messages:
3455            //
3456            //   <target/>
3457            // </root>
3458            de.start_replay(checkpoint);
3459            assert_eq!(
3460                de.read,
3461                vec![
3462                    // This comment here to keep the same formatting as others
3463                    // otherwise rustfmt suggest one-line it
3464                    Text("text".into()),
3465                    End(BytesEnd::new("inner")),
3466                ]
3467            );
3468            assert_eq!(de.write, vec![]);
3469            assert_eq!(de.next().unwrap(), Text("text".into()));
3470            assert_eq!(de.next().unwrap(), End(BytesEnd::new("inner")));
3471            assert_eq!(de.next().unwrap(), Start(BytesStart::new("target")));
3472            assert_eq!(de.next().unwrap(), End(BytesEnd::new("target")));
3473            assert_eq!(de.next().unwrap(), End(BytesEnd::new("root")));
3474            assert_eq!(de.next().unwrap(), Eof);
3475        }
3476
3477        /// Checks that `read_to_end()` behaves correctly after `skip()`
3478        #[test]
3479        fn read_to_end() {
3480            let mut de = make_de(
3481                r#"
3482                <root>
3483                    <skip>
3484                        text
3485                        <skip/>
3486                    </skip>
3487                    <target>
3488                        <target/>
3489                    </target>
3490                </root>
3491                "#,
3492            );
3493
3494            // Initial conditions - both are empty
3495            assert_eq!(de.read, vec![]);
3496            assert_eq!(de.write, vec![]);
3497
3498            assert_eq!(de.next().unwrap(), Start(BytesStart::new("root")));
3499
3500            // Mark that start_replay() should begin replay from this point
3501            let checkpoint = de.skip_checkpoint();
3502            assert_eq!(checkpoint, 0);
3503
3504            // Skip the <skip> tree
3505            de.skip().unwrap();
3506            assert_eq!(de.read, vec![]);
3507            assert_eq!(
3508                de.write,
3509                vec![
3510                    Start(BytesStart::new("skip")),
3511                    Text("text".into()),
3512                    Start(BytesStart::new("skip")),
3513                    End(BytesEnd::new("skip")),
3514                    End(BytesEnd::new("skip")),
3515                ]
3516            );
3517
3518            // Drop all events that represents <target> tree. Now unconsumed XML looks like:
3519            //
3520            //   <skip>
3521            //     text
3522            //     <skip/>
3523            //   </skip>
3524            // </root>
3525            assert_eq!(de.next().unwrap(), Start(BytesStart::new("target")));
3526            de.read_to_end(QName(b"target")).unwrap();
3527            assert_eq!(de.read, vec![]);
3528            assert_eq!(
3529                de.write,
3530                vec![
3531                    Start(BytesStart::new("skip")),
3532                    Text("text".into()),
3533                    Start(BytesStart::new("skip")),
3534                    End(BytesEnd::new("skip")),
3535                    End(BytesEnd::new("skip")),
3536                ]
3537            );
3538
3539            // We finish writing. Next call to `next()` should start replay that messages:
3540            //
3541            //   <skip>
3542            //     text
3543            //     <skip/>
3544            //   </skip>
3545            //
3546            // and after that stream that messages:
3547            //
3548            // </root>
3549            de.start_replay(checkpoint);
3550            assert_eq!(
3551                de.read,
3552                vec![
3553                    Start(BytesStart::new("skip")),
3554                    Text("text".into()),
3555                    Start(BytesStart::new("skip")),
3556                    End(BytesEnd::new("skip")),
3557                    End(BytesEnd::new("skip")),
3558                ]
3559            );
3560            assert_eq!(de.write, vec![]);
3561
3562            assert_eq!(de.next().unwrap(), Start(BytesStart::new("skip")));
3563            de.read_to_end(QName(b"skip")).unwrap();
3564
3565            assert_eq!(de.next().unwrap(), End(BytesEnd::new("root")));
3566            assert_eq!(de.next().unwrap(), Eof);
3567        }
3568
3569        /// Checks that replay replayes only part of events
3570        /// Test for https://github.com/tafia/quick-xml/issues/435
3571        #[test]
3572        fn partial_replay() {
3573            let mut de = make_de(
3574                r#"
3575                <root>
3576                    <skipped-1/>
3577                    <skipped-2/>
3578                    <inner>
3579                        <skipped-3/>
3580                        <skipped-4/>
3581                        <target-2/>
3582                    </inner>
3583                    <target-1/>
3584                </root>
3585                "#,
3586            );
3587
3588            // Initial conditions - both are empty
3589            assert_eq!(de.read, vec![]);
3590            assert_eq!(de.write, vec![]);
3591
3592            assert_eq!(de.next().unwrap(), Start(BytesStart::new("root")));
3593
3594            // start_replay() should start replay from this point
3595            let checkpoint1 = de.skip_checkpoint();
3596            assert_eq!(checkpoint1, 0);
3597
3598            // Should skip first and second <skipped-N/> elements
3599            de.skip().unwrap(); // skipped-1
3600            de.skip().unwrap(); // skipped-2
3601            assert_eq!(de.read, vec![]);
3602            assert_eq!(
3603                de.write,
3604                vec![
3605                    Start(BytesStart::new("skipped-1")),
3606                    End(BytesEnd::new("skipped-1")),
3607                    Start(BytesStart::new("skipped-2")),
3608                    End(BytesEnd::new("skipped-2")),
3609                ]
3610            );
3611
3612            ////////////////////////////////////////////////////////////////////////////////////////
3613
3614            assert_eq!(de.next().unwrap(), Start(BytesStart::new("inner")));
3615            assert_eq!(de.peek().unwrap(), &Start(BytesStart::new("skipped-3")));
3616            assert_eq!(
3617                de.read,
3618                vec![
3619                    // This comment here to keep the same formatting of both arrays
3620                    // otherwise rustfmt suggest one-line it
3621                    Start(BytesStart::new("skipped-3")),
3622                ]
3623            );
3624            assert_eq!(
3625                de.write,
3626                vec![
3627                    Start(BytesStart::new("skipped-1")),
3628                    End(BytesEnd::new("skipped-1")),
3629                    Start(BytesStart::new("skipped-2")),
3630                    End(BytesEnd::new("skipped-2")),
3631                ]
3632            );
3633
3634            // start_replay() should start replay from this point
3635            let checkpoint2 = de.skip_checkpoint();
3636            assert_eq!(checkpoint2, 4);
3637
3638            // Should skip third and forth <skipped-N/> elements
3639            de.skip().unwrap(); // skipped-3
3640            de.skip().unwrap(); // skipped-4
3641            assert_eq!(de.read, vec![]);
3642            assert_eq!(
3643                de.write,
3644                vec![
3645                    // checkpoint 1
3646                    Start(BytesStart::new("skipped-1")),
3647                    End(BytesEnd::new("skipped-1")),
3648                    Start(BytesStart::new("skipped-2")),
3649                    End(BytesEnd::new("skipped-2")),
3650                    // checkpoint 2
3651                    Start(BytesStart::new("skipped-3")),
3652                    End(BytesEnd::new("skipped-3")),
3653                    Start(BytesStart::new("skipped-4")),
3654                    End(BytesEnd::new("skipped-4")),
3655                ]
3656            );
3657            assert_eq!(de.next().unwrap(), Start(BytesStart::new("target-2")));
3658            assert_eq!(de.next().unwrap(), End(BytesEnd::new("target-2")));
3659            assert_eq!(de.peek().unwrap(), &End(BytesEnd::new("inner")));
3660            assert_eq!(
3661                de.read,
3662                vec![
3663                    // This comment here to keep the same formatting of both arrays
3664                    // otherwise rustfmt suggest one-line it
3665                    End(BytesEnd::new("inner")),
3666                ]
3667            );
3668            assert_eq!(
3669                de.write,
3670                vec![
3671                    // checkpoint 1
3672                    Start(BytesStart::new("skipped-1")),
3673                    End(BytesEnd::new("skipped-1")),
3674                    Start(BytesStart::new("skipped-2")),
3675                    End(BytesEnd::new("skipped-2")),
3676                    // checkpoint 2
3677                    Start(BytesStart::new("skipped-3")),
3678                    End(BytesEnd::new("skipped-3")),
3679                    Start(BytesStart::new("skipped-4")),
3680                    End(BytesEnd::new("skipped-4")),
3681                ]
3682            );
3683
3684            // Start replay events from checkpoint 2
3685            de.start_replay(checkpoint2);
3686            assert_eq!(
3687                de.read,
3688                vec![
3689                    Start(BytesStart::new("skipped-3")),
3690                    End(BytesEnd::new("skipped-3")),
3691                    Start(BytesStart::new("skipped-4")),
3692                    End(BytesEnd::new("skipped-4")),
3693                    End(BytesEnd::new("inner")),
3694                ]
3695            );
3696            assert_eq!(
3697                de.write,
3698                vec![
3699                    Start(BytesStart::new("skipped-1")),
3700                    End(BytesEnd::new("skipped-1")),
3701                    Start(BytesStart::new("skipped-2")),
3702                    End(BytesEnd::new("skipped-2")),
3703                ]
3704            );
3705
3706            // Replayed events
3707            assert_eq!(de.next().unwrap(), Start(BytesStart::new("skipped-3")));
3708            assert_eq!(de.next().unwrap(), End(BytesEnd::new("skipped-3")));
3709            assert_eq!(de.next().unwrap(), Start(BytesStart::new("skipped-4")));
3710            assert_eq!(de.next().unwrap(), End(BytesEnd::new("skipped-4")));
3711
3712            assert_eq!(de.next().unwrap(), End(BytesEnd::new("inner")));
3713            assert_eq!(de.read, vec![]);
3714            assert_eq!(
3715                de.write,
3716                vec![
3717                    Start(BytesStart::new("skipped-1")),
3718                    End(BytesEnd::new("skipped-1")),
3719                    Start(BytesStart::new("skipped-2")),
3720                    End(BytesEnd::new("skipped-2")),
3721                ]
3722            );
3723
3724            ////////////////////////////////////////////////////////////////////////////////////////
3725
3726            // New events
3727            assert_eq!(de.next().unwrap(), Start(BytesStart::new("target-1")));
3728            assert_eq!(de.next().unwrap(), End(BytesEnd::new("target-1")));
3729
3730            assert_eq!(de.read, vec![]);
3731            assert_eq!(
3732                de.write,
3733                vec![
3734                    Start(BytesStart::new("skipped-1")),
3735                    End(BytesEnd::new("skipped-1")),
3736                    Start(BytesStart::new("skipped-2")),
3737                    End(BytesEnd::new("skipped-2")),
3738                ]
3739            );
3740
3741            // Start replay events from checkpoint 1
3742            de.start_replay(checkpoint1);
3743            assert_eq!(
3744                de.read,
3745                vec![
3746                    Start(BytesStart::new("skipped-1")),
3747                    End(BytesEnd::new("skipped-1")),
3748                    Start(BytesStart::new("skipped-2")),
3749                    End(BytesEnd::new("skipped-2")),
3750                ]
3751            );
3752            assert_eq!(de.write, vec![]);
3753
3754            // Replayed events
3755            assert_eq!(de.next().unwrap(), Start(BytesStart::new("skipped-1")));
3756            assert_eq!(de.next().unwrap(), End(BytesEnd::new("skipped-1")));
3757            assert_eq!(de.next().unwrap(), Start(BytesStart::new("skipped-2")));
3758            assert_eq!(de.next().unwrap(), End(BytesEnd::new("skipped-2")));
3759
3760            assert_eq!(de.read, vec![]);
3761            assert_eq!(de.write, vec![]);
3762
3763            // New events
3764            assert_eq!(de.next().unwrap(), End(BytesEnd::new("root")));
3765            assert_eq!(de.next().unwrap(), Eof);
3766        }
3767
3768        /// Checks that limiting buffer size works correctly
3769        #[test]
3770        fn limit() {
3771            use serde::Deserialize;
3772
3773            #[derive(Debug, Deserialize)]
3774            #[allow(unused)]
3775            struct List {
3776                item: Vec<()>,
3777            }
3778
3779            let mut de = make_de(
3780                r#"
3781                <any-name>
3782                    <item/>
3783                    <another-item>
3784                        <some-element>with text</some-element>
3785                        <yet-another-element/>
3786                    </another-item>
3787                    <item/>
3788                    <item/>
3789                </any-name>
3790                "#,
3791            );
3792            de.event_buffer_size(NonZeroUsize::new(3));
3793
3794            match List::deserialize(&mut de) {
3795                Err(DeError::TooManyEvents(count)) => assert_eq!(count.get(), 3),
3796                e => panic!("Expected `Err(TooManyEvents(3))`, but got `{:?}`", e),
3797            }
3798        }
3799
3800        /// Without handling Eof in `skip` this test failed with memory allocation
3801        #[test]
3802        fn invalid_xml() {
3803            use crate::de::DeEvent::*;
3804
3805            let mut de = make_de("<root>");
3806
3807            // Cache all events
3808            let checkpoint = de.skip_checkpoint();
3809            de.skip().unwrap();
3810            de.start_replay(checkpoint);
3811            assert_eq!(de.read, vec![Start(BytesStart::new("root")), Eof]);
3812        }
3813    }
3814
3815    mod read_to_end {
3816        use super::*;
3817        use crate::de::DeEvent::*;
3818        use pretty_assertions::assert_eq;
3819
3820        #[test]
3821        fn complex() {
3822            let mut de = make_de(
3823                r#"
3824                <root>
3825                    <tag a="1"><tag>text</tag>content</tag>
3826                    <tag a="2"><![CDATA[cdata content]]></tag>
3827                    <self-closed/>
3828                </root>
3829                "#,
3830            );
3831
3832            assert_eq!(de.next().unwrap(), Start(BytesStart::new("root")));
3833
3834            assert_eq!(
3835                de.next().unwrap(),
3836                Start(BytesStart::from_content(r#"tag a="1""#, 3))
3837            );
3838            assert_eq!(de.read_to_end(QName(b"tag")).unwrap(), ());
3839
3840            assert_eq!(
3841                de.next().unwrap(),
3842                Start(BytesStart::from_content(r#"tag a="2""#, 3))
3843            );
3844            assert_eq!(de.next().unwrap(), Text("cdata content".into()));
3845            assert_eq!(de.next().unwrap(), End(BytesEnd::new("tag")));
3846
3847            assert_eq!(de.next().unwrap(), Start(BytesStart::new("self-closed")));
3848            assert_eq!(de.read_to_end(QName(b"self-closed")).unwrap(), ());
3849
3850            assert_eq!(de.next().unwrap(), End(BytesEnd::new("root")));
3851            assert_eq!(de.next().unwrap(), Eof);
3852        }
3853
3854        #[test]
3855        fn invalid_xml1() {
3856            let mut de = make_de("<tag><tag></tag>");
3857
3858            assert_eq!(de.next().unwrap(), Start(BytesStart::new("tag")));
3859            assert_eq!(de.peek().unwrap(), &Start(BytesStart::new("tag")));
3860
3861            match de.read_to_end(QName(b"tag")) {
3862                Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
3863                    assert_eq!(cause, IllFormedError::MissingEndTag("tag".into()))
3864                }
3865                x => panic!(
3866                    "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
3867                    x
3868                ),
3869            }
3870            assert_eq!(de.next().unwrap(), Eof);
3871        }
3872
3873        #[test]
3874        fn invalid_xml2() {
3875            let mut de = make_de("<tag><![CDATA[]]><tag></tag>");
3876
3877            assert_eq!(de.next().unwrap(), Start(BytesStart::new("tag")));
3878            assert_eq!(de.peek().unwrap(), &Text("".into()));
3879
3880            match de.read_to_end(QName(b"tag")) {
3881                Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
3882                    assert_eq!(cause, IllFormedError::MissingEndTag("tag".into()))
3883                }
3884                x => panic!(
3885                    "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
3886                    x
3887                ),
3888            }
3889            assert_eq!(de.next().unwrap(), Eof);
3890        }
3891    }
3892
3893    #[test]
3894    fn borrowing_reader_parity() {
3895        let s = r#"
3896            <item name="hello" source="world.rs">Some text</item>
3897            <item2/>
3898            <item3 value="world" />
3899        "#;
3900
3901        let mut reader1 = IoReader {
3902            reader: NsReader::from_reader(s.as_bytes()),
3903            start_trimmer: StartTrimmer::default(),
3904            buf: Vec::new(),
3905        };
3906        let mut reader2 = SliceReader {
3907            reader: NsReader::from_str(s),
3908            start_trimmer: StartTrimmer::default(),
3909        };
3910
3911        loop {
3912            let event1 = reader1.next().unwrap();
3913            let event2 = reader2.next().unwrap();
3914
3915            if let (PayloadEvent::Eof, PayloadEvent::Eof) = (&event1, &event2) {
3916                break;
3917            }
3918
3919            assert_eq!(event1, event2);
3920        }
3921    }
3922
3923    #[test]
3924    fn borrowing_reader_events() {
3925        let s = r#"
3926            <item name="hello" source="world.rs">Some text</item>
3927            <item2></item2>
3928            <item3/>
3929            <item4 value="world" />
3930        "#;
3931
3932        let mut reader = SliceReader {
3933            reader: NsReader::from_str(s),
3934            start_trimmer: StartTrimmer::default(),
3935        };
3936
3937        let config = reader.reader.config_mut();
3938        config.expand_empty_elements = true;
3939
3940        let mut events = Vec::new();
3941
3942        loop {
3943            let event = reader.next().unwrap();
3944            if let PayloadEvent::Eof = event {
3945                break;
3946            }
3947            events.push(event);
3948        }
3949
3950        use crate::de::PayloadEvent::*;
3951
3952        assert_eq!(
3953            events,
3954            vec![
3955                Start(BytesStart::from_content(
3956                    r#"item name="hello" source="world.rs""#,
3957                    4
3958                )),
3959                Text(BytesText::from_escaped("Some text")),
3960                End(BytesEnd::new("item")),
3961                Start(BytesStart::from_content("item2", 5)),
3962                End(BytesEnd::new("item2")),
3963                Start(BytesStart::from_content("item3", 5)),
3964                End(BytesEnd::new("item3")),
3965                Start(BytesStart::from_content(r#"item4 value="world" "#, 5)),
3966                End(BytesEnd::new("item4")),
3967            ]
3968        )
3969    }
3970
3971    /// Ensures, that [`Deserializer::read_string()`] never can get an `End` event,
3972    /// because parser reports error early
3973    #[test]
3974    fn read_string() {
3975        match from_str::<String>(r#"</root>"#) {
3976            Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
3977                assert_eq!(cause, IllFormedError::UnmatchedEndTag("root".into()));
3978            }
3979            x => panic!(
3980                "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
3981                x
3982            ),
3983        }
3984
3985        let s: String = from_str(r#"<root></root>"#).unwrap();
3986        assert_eq!(s, "");
3987
3988        match from_str::<String>(r#"<root></other>"#) {
3989            Err(DeError::InvalidXml(Error::IllFormed(cause))) => assert_eq!(
3990                cause,
3991                IllFormedError::MismatchedEndTag {
3992                    expected: "root".into(),
3993                    found: "other".into(),
3994                }
3995            ),
3996            x => panic!("Expected `Err(InvalidXml(IllFormed(_))`, but got `{:?}`", x),
3997        }
3998    }
3999
4000    /// Tests for https://github.com/tafia/quick-xml/issues/474.
4001    ///
4002    /// That tests ensures that comments and processed instructions is ignored
4003    /// and can split one logical string in pieces.
4004    mod merge_text {
4005        use super::*;
4006        use pretty_assertions::assert_eq;
4007
4008        #[test]
4009        fn text() {
4010            let mut de = make_de("text");
4011            assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4012        }
4013
4014        #[test]
4015        fn cdata() {
4016            let mut de = make_de("<![CDATA[cdata]]>");
4017            assert_eq!(de.next().unwrap(), DeEvent::Text("cdata".into()));
4018        }
4019
4020        #[test]
4021        fn text_and_cdata() {
4022            let mut de = make_de("text and <![CDATA[cdata]]>");
4023            assert_eq!(de.next().unwrap(), DeEvent::Text("text and cdata".into()));
4024        }
4025
4026        #[test]
4027        fn text_and_empty_cdata() {
4028            let mut de = make_de("text and <![CDATA[]]>");
4029            assert_eq!(de.next().unwrap(), DeEvent::Text("text and ".into()));
4030        }
4031
4032        #[test]
4033        fn cdata_and_text() {
4034            let mut de = make_de("<![CDATA[cdata]]> and text");
4035            assert_eq!(de.next().unwrap(), DeEvent::Text("cdata and text".into()));
4036        }
4037
4038        #[test]
4039        fn empty_cdata_and_text() {
4040            let mut de = make_de("<![CDATA[]]> and text");
4041            assert_eq!(de.next().unwrap(), DeEvent::Text(" and text".into()));
4042        }
4043
4044        #[test]
4045        fn cdata_and_cdata() {
4046            let mut de = make_de(
4047                "\
4048                    <![CDATA[cdata]]]]>\
4049                    <![CDATA[>cdata]]>\
4050                ",
4051            );
4052            assert_eq!(de.next().unwrap(), DeEvent::Text("cdata]]>cdata".into()));
4053        }
4054
4055        mod comment_between {
4056            use super::*;
4057            use pretty_assertions::assert_eq;
4058
4059            #[test]
4060            fn text() {
4061                let mut de = make_de(
4062                    "\
4063                        text \
4064                        <!--comment 1--><!--comment 2--> \
4065                        text\
4066                    ",
4067                );
4068                assert_eq!(de.next().unwrap(), DeEvent::Text("text  text".into()));
4069            }
4070
4071            #[test]
4072            fn cdata() {
4073                let mut de = make_de(
4074                    "\
4075                        <![CDATA[cdata]]]]>\
4076                        <!--comment 1--><!--comment 2-->\
4077                        <![CDATA[>cdata]]>\
4078                    ",
4079                );
4080                assert_eq!(de.next().unwrap(), DeEvent::Text("cdata]]>cdata".into()));
4081            }
4082
4083            #[test]
4084            fn text_and_cdata() {
4085                let mut de = make_de(
4086                    "\
4087                        text \
4088                        <!--comment 1--><!--comment 2-->\
4089                        <![CDATA[ cdata]]>\
4090                    ",
4091                );
4092                assert_eq!(de.next().unwrap(), DeEvent::Text("text  cdata".into()));
4093            }
4094
4095            #[test]
4096            fn text_and_empty_cdata() {
4097                let mut de = make_de(
4098                    "\
4099                        text \
4100                        <!--comment 1--><!--comment 2-->\
4101                        <![CDATA[]]>\
4102                    ",
4103                );
4104                assert_eq!(de.next().unwrap(), DeEvent::Text("text ".into()));
4105            }
4106
4107            #[test]
4108            fn cdata_and_text() {
4109                let mut de = make_de(
4110                    "\
4111                        <![CDATA[cdata ]]>\
4112                        <!--comment 1--><!--comment 2--> \
4113                        text \
4114                    ",
4115                );
4116                assert_eq!(de.next().unwrap(), DeEvent::Text("cdata  text".into()));
4117            }
4118
4119            #[test]
4120            fn empty_cdata_and_text() {
4121                let mut de = make_de(
4122                    "\
4123                        <![CDATA[]]>\
4124                        <!--comment 1--><!--comment 2--> \
4125                        text \
4126                    ",
4127                );
4128                assert_eq!(de.next().unwrap(), DeEvent::Text(" text".into()));
4129            }
4130
4131            #[test]
4132            fn cdata_and_cdata() {
4133                let mut de = make_de(
4134                    "\
4135                        <![CDATA[cdata]]]>\
4136                        <!--comment 1--><!--comment 2-->\
4137                        <![CDATA[]>cdata]]>\
4138                    ",
4139                );
4140                assert_eq!(de.next().unwrap(), DeEvent::Text("cdata]]>cdata".into()));
4141            }
4142        }
4143
4144        mod pi_between {
4145            use super::*;
4146            use pretty_assertions::assert_eq;
4147
4148            #[test]
4149            fn text() {
4150                let mut de = make_de(
4151                    "\
4152                        text \
4153                        <?pi 1?><?pi 2?> \
4154                        text\
4155                    ",
4156                );
4157                assert_eq!(de.next().unwrap(), DeEvent::Text("text  text".into()));
4158            }
4159
4160            #[test]
4161            fn cdata() {
4162                let mut de = make_de(
4163                    "\
4164                        <![CDATA[cdata]]]]>\
4165                        <?pi 1?><?pi 2?>\
4166                        <![CDATA[>cdata]]>\
4167                    ",
4168                );
4169                assert_eq!(de.next().unwrap(), DeEvent::Text("cdata]]>cdata".into()));
4170            }
4171
4172            #[test]
4173            fn text_and_cdata() {
4174                let mut de = make_de(
4175                    "\
4176                        text \
4177                        <?pi 1?><?pi 2?>\
4178                        <![CDATA[ cdata]]>\
4179                    ",
4180                );
4181                assert_eq!(de.next().unwrap(), DeEvent::Text("text  cdata".into()));
4182            }
4183
4184            #[test]
4185            fn text_and_empty_cdata() {
4186                let mut de = make_de(
4187                    "\
4188                        text \
4189                        <?pi 1?><?pi 2?>\
4190                        <![CDATA[]]>\
4191                    ",
4192                );
4193                assert_eq!(de.next().unwrap(), DeEvent::Text("text ".into()));
4194            }
4195
4196            #[test]
4197            fn cdata_and_text() {
4198                let mut de = make_de(
4199                    "\
4200                        <![CDATA[cdata ]]>\
4201                        <?pi 1?><?pi 2?> \
4202                        text \
4203                    ",
4204                );
4205                assert_eq!(de.next().unwrap(), DeEvent::Text("cdata  text".into()));
4206            }
4207
4208            #[test]
4209            fn empty_cdata_and_text() {
4210                let mut de = make_de(
4211                    "\
4212                        <![CDATA[]]>\
4213                        <?pi 1?><?pi 2?> \
4214                        text \
4215                    ",
4216                );
4217                assert_eq!(de.next().unwrap(), DeEvent::Text(" text".into()));
4218            }
4219
4220            #[test]
4221            fn cdata_and_cdata() {
4222                let mut de = make_de(
4223                    "\
4224                        <![CDATA[cdata]]]>\
4225                        <?pi 1?><?pi 2?>\
4226                        <![CDATA[]>cdata]]>\
4227                    ",
4228                );
4229                assert_eq!(de.next().unwrap(), DeEvent::Text("cdata]]>cdata".into()));
4230            }
4231        }
4232    }
4233
4234    /// Tests for https://github.com/tafia/quick-xml/issues/474.
4235    ///
4236    /// This tests ensures that any combination of payload data is processed
4237    /// as expected.
4238    mod triples {
4239        use super::*;
4240        use pretty_assertions::assert_eq;
4241
4242        mod start {
4243            use super::*;
4244
4245            /// <tag1><tag2>...
4246            mod start {
4247                use super::*;
4248                use pretty_assertions::assert_eq;
4249
4250                #[test]
4251                fn start() {
4252                    let mut de = make_de("<tag1><tag2><tag3>");
4253                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1")));
4254                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2")));
4255                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag3")));
4256                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4257                }
4258
4259                /// Not matching end tag will result to error
4260                #[test]
4261                fn end() {
4262                    let mut de = make_de("<tag1><tag2></tag2>");
4263                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1")));
4264                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2")));
4265                    assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag2")));
4266                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4267                }
4268
4269                #[test]
4270                fn text() {
4271                    let mut de = make_de("<tag1><tag2> text ");
4272                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1")));
4273                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2")));
4274                    // Text is trimmed from both sides
4275                    assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4276                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4277                }
4278
4279                #[test]
4280                fn cdata() {
4281                    let mut de = make_de("<tag1><tag2><![CDATA[ cdata ]]>");
4282                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1")));
4283                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2")));
4284                    assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4285                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4286                }
4287
4288                #[test]
4289                fn eof() {
4290                    let mut de = make_de("<tag1><tag2>");
4291                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1")));
4292                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2")));
4293                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4294                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4295                }
4296            }
4297
4298            /// <tag></tag>...
4299            mod end {
4300                use super::*;
4301                use pretty_assertions::assert_eq;
4302
4303                #[test]
4304                fn start() {
4305                    let mut de = make_de("<tag></tag><tag2>");
4306                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4307                    assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag")));
4308                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2")));
4309                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4310                }
4311
4312                #[test]
4313                fn end() {
4314                    let mut de = make_de("<tag></tag></tag2>");
4315                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4316                    assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag")));
4317                    match de.next() {
4318                        Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
4319                            assert_eq!(cause, IllFormedError::UnmatchedEndTag("tag2".into()));
4320                        }
4321                        x => panic!(
4322                            "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
4323                            x
4324                        ),
4325                    }
4326                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4327                }
4328
4329                #[test]
4330                fn text() {
4331                    let mut de = make_de("<tag></tag> text ");
4332                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4333                    assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag")));
4334                    // Text is trimmed from both sides
4335                    assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4336                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4337                }
4338
4339                #[test]
4340                fn cdata() {
4341                    let mut de = make_de("<tag></tag><![CDATA[ cdata ]]>");
4342                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4343                    assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag")));
4344                    assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4345                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4346                }
4347
4348                #[test]
4349                fn eof() {
4350                    let mut de = make_de("<tag></tag>");
4351                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4352                    assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag")));
4353                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4354                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4355                }
4356            }
4357
4358            /// <tag> text ...
4359            mod text {
4360                use super::*;
4361                use pretty_assertions::assert_eq;
4362
4363                #[test]
4364                fn start() {
4365                    let mut de = make_de("<tag> text <tag2>");
4366                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4367                    // Text is trimmed from both sides
4368                    assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4369                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2")));
4370                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4371                }
4372
4373                #[test]
4374                fn end() {
4375                    let mut de = make_de("<tag> text </tag>");
4376                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4377                    // Text is trimmed from both sides
4378                    assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4379                    assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag")));
4380                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4381                }
4382
4383                // start::text::text has no difference from start::text
4384
4385                #[test]
4386                fn cdata() {
4387                    let mut de = make_de("<tag> text <![CDATA[ cdata ]]>");
4388                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4389                    // Text is trimmed from the start
4390                    assert_eq!(de.next().unwrap(), DeEvent::Text("text  cdata ".into()));
4391                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4392                }
4393
4394                #[test]
4395                fn eof() {
4396                    let mut de = make_de("<tag> text ");
4397                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4398                    // Text is trimmed from both sides
4399                    assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4400                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4401                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4402                }
4403            }
4404
4405            /// <tag><![CDATA[ cdata ]]>...
4406            mod cdata {
4407                use super::*;
4408                use pretty_assertions::assert_eq;
4409
4410                #[test]
4411                fn start() {
4412                    let mut de = make_de("<tag><![CDATA[ cdata ]]><tag2>");
4413                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4414                    assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4415                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2")));
4416                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4417                }
4418
4419                #[test]
4420                fn end() {
4421                    let mut de = make_de("<tag><![CDATA[ cdata ]]></tag>");
4422                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4423                    assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4424                    assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag")));
4425                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4426                }
4427
4428                #[test]
4429                fn text() {
4430                    let mut de = make_de("<tag><![CDATA[ cdata ]]> text ");
4431                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4432                    // Text is trimmed from the end
4433                    assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata  text".into()));
4434                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4435                }
4436
4437                #[test]
4438                fn cdata() {
4439                    let mut de = make_de("<tag><![CDATA[ cdata ]]><![CDATA[ cdata2 ]]>");
4440                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4441                    assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata  cdata2 ".into()));
4442                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4443                }
4444
4445                #[test]
4446                fn eof() {
4447                    let mut de = make_de("<tag><![CDATA[ cdata ]]>");
4448                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4449                    assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4450                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4451                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4452                }
4453            }
4454        }
4455
4456        /// Start from End event will always generate an error
4457        #[test]
4458        fn end() {
4459            let mut de = make_de("</tag>");
4460            match de.next() {
4461                Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
4462                    assert_eq!(cause, IllFormedError::UnmatchedEndTag("tag".into()));
4463                }
4464                x => panic!(
4465                    "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
4466                    x
4467                ),
4468            }
4469            assert_eq!(de.next().unwrap(), DeEvent::Eof);
4470        }
4471
4472        mod text {
4473            use super::*;
4474            use pretty_assertions::assert_eq;
4475
4476            mod start {
4477                use super::*;
4478                use pretty_assertions::assert_eq;
4479
4480                #[test]
4481                fn start() {
4482                    let mut de = make_de(" text <tag1><tag2>");
4483                    // Text is trimmed from both sides
4484                    assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4485                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1")));
4486                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2")));
4487                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4488                }
4489
4490                /// Not matching end tag will result in error
4491                #[test]
4492                fn end() {
4493                    let mut de = make_de(" text <tag></tag>");
4494                    // Text is trimmed from both sides
4495                    assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4496                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4497                    assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag")));
4498                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4499                }
4500
4501                #[test]
4502                fn text() {
4503                    let mut de = make_de(" text <tag> text2 ");
4504                    // Text is trimmed from both sides
4505                    assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4506                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4507                    // Text is trimmed from both sides
4508                    assert_eq!(de.next().unwrap(), DeEvent::Text("text2".into()));
4509                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4510                }
4511
4512                #[test]
4513                fn cdata() {
4514                    let mut de = make_de(" text <tag><![CDATA[ cdata ]]>");
4515                    // Text is trimmed from both sides
4516                    assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4517                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4518                    assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4519                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4520                }
4521
4522                #[test]
4523                fn eof() {
4524                    // Text is trimmed from both sides
4525                    let mut de = make_de(" text <tag>");
4526                    assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4527                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4528                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4529                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4530                }
4531            }
4532
4533            /// End event without corresponding start event will always generate an error
4534            #[test]
4535            fn end() {
4536                let mut de = make_de(" text </tag>");
4537                // Text is trimmed from both sides
4538                assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4539                match de.next() {
4540                    Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
4541                        assert_eq!(cause, IllFormedError::UnmatchedEndTag("tag".into()));
4542                    }
4543                    x => panic!(
4544                        "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
4545                        x
4546                    ),
4547                }
4548                assert_eq!(de.next().unwrap(), DeEvent::Eof);
4549            }
4550
4551            // text::text::something is equivalent to text::something
4552
4553            mod cdata {
4554                use super::*;
4555                use pretty_assertions::assert_eq;
4556
4557                #[test]
4558                fn start() {
4559                    let mut de = make_de(" text <![CDATA[ cdata ]]><tag>");
4560                    // Text is trimmed from the start
4561                    assert_eq!(de.next().unwrap(), DeEvent::Text("text  cdata ".into()));
4562                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4563                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4564                }
4565
4566                #[test]
4567                fn end() {
4568                    let mut de = make_de(" text <![CDATA[ cdata ]]></tag>");
4569                    // Text is trimmed from the start
4570                    assert_eq!(de.next().unwrap(), DeEvent::Text("text  cdata ".into()));
4571                    match de.next() {
4572                        Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
4573                            assert_eq!(cause, IllFormedError::UnmatchedEndTag("tag".into()));
4574                        }
4575                        x => panic!(
4576                            "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
4577                            x
4578                        ),
4579                    }
4580                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4581                }
4582
4583                #[test]
4584                fn text() {
4585                    let mut de = make_de(" text <![CDATA[ cdata ]]> text2 ");
4586                    // Text is trimmed from the start and from the end
4587                    assert_eq!(
4588                        de.next().unwrap(),
4589                        DeEvent::Text("text  cdata  text2".into())
4590                    );
4591                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4592                }
4593
4594                #[test]
4595                fn cdata() {
4596                    let mut de = make_de(" text <![CDATA[ cdata ]]><![CDATA[ cdata2 ]]>");
4597                    // Text is trimmed from the start
4598                    assert_eq!(
4599                        de.next().unwrap(),
4600                        DeEvent::Text("text  cdata  cdata2 ".into())
4601                    );
4602                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4603                }
4604
4605                #[test]
4606                fn eof() {
4607                    let mut de = make_de(" text <![CDATA[ cdata ]]>");
4608                    // Text is trimmed from the start
4609                    assert_eq!(de.next().unwrap(), DeEvent::Text("text  cdata ".into()));
4610                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4611                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4612                }
4613            }
4614        }
4615
4616        mod cdata {
4617            use super::*;
4618            use pretty_assertions::assert_eq;
4619
4620            mod start {
4621                use super::*;
4622                use pretty_assertions::assert_eq;
4623
4624                #[test]
4625                fn start() {
4626                    let mut de = make_de("<![CDATA[ cdata ]]><tag1><tag2>");
4627                    assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4628                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1")));
4629                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2")));
4630                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4631                }
4632
4633                /// Not matching end tag will result in error
4634                #[test]
4635                fn end() {
4636                    let mut de = make_de("<![CDATA[ cdata ]]><tag></tag>");
4637                    assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4638                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4639                    assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag")));
4640                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4641                }
4642
4643                #[test]
4644                fn text() {
4645                    let mut de = make_de("<![CDATA[ cdata ]]><tag> text ");
4646                    assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4647                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4648                    // Text is trimmed from both sides
4649                    assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4650                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4651                }
4652
4653                #[test]
4654                fn cdata() {
4655                    let mut de = make_de("<![CDATA[ cdata ]]><tag><![CDATA[ cdata2 ]]>");
4656                    assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4657                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4658                    assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata2 ".into()));
4659                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4660                }
4661
4662                #[test]
4663                fn eof() {
4664                    let mut de = make_de("<![CDATA[ cdata ]]><tag>");
4665                    assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4666                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4667                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4668                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4669                }
4670            }
4671
4672            /// End event without corresponding start event will always generate an error
4673            #[test]
4674            fn end() {
4675                let mut de = make_de("<![CDATA[ cdata ]]></tag>");
4676                assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4677                match de.next() {
4678                    Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
4679                        assert_eq!(cause, IllFormedError::UnmatchedEndTag("tag".into()));
4680                    }
4681                    x => panic!(
4682                        "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
4683                        x
4684                    ),
4685                }
4686                assert_eq!(de.next().unwrap(), DeEvent::Eof);
4687            }
4688
4689            mod text {
4690                use super::*;
4691                use pretty_assertions::assert_eq;
4692
4693                #[test]
4694                fn start() {
4695                    let mut de = make_de("<![CDATA[ cdata ]]> text <tag>");
4696                    // Text is trimmed from the end
4697                    assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata  text".into()));
4698                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4699                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4700                }
4701
4702                #[test]
4703                fn end() {
4704                    let mut de = make_de("<![CDATA[ cdata ]]> text </tag>");
4705                    // Text is trimmed from the end
4706                    assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata  text".into()));
4707                    match de.next() {
4708                        Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
4709                            assert_eq!(cause, IllFormedError::UnmatchedEndTag("tag".into()));
4710                        }
4711                        x => panic!(
4712                            "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
4713                            x
4714                        ),
4715                    }
4716                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4717                }
4718
4719                // cdata::text::text is equivalent to cdata::text
4720
4721                #[test]
4722                fn cdata() {
4723                    let mut de = make_de("<![CDATA[ cdata ]]> text <![CDATA[ cdata2 ]]>");
4724                    assert_eq!(
4725                        de.next().unwrap(),
4726                        DeEvent::Text(" cdata  text  cdata2 ".into())
4727                    );
4728                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4729                }
4730
4731                #[test]
4732                fn eof() {
4733                    let mut de = make_de("<![CDATA[ cdata ]]> text ");
4734                    // Text is trimmed from the end
4735                    assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata  text".into()));
4736                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4737                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4738                }
4739            }
4740
4741            mod cdata {
4742                use super::*;
4743                use pretty_assertions::assert_eq;
4744
4745                #[test]
4746                fn start() {
4747                    let mut de = make_de("<![CDATA[ cdata ]]><![CDATA[ cdata2 ]]><tag>");
4748                    assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata  cdata2 ".into()));
4749                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4750                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4751                }
4752
4753                #[test]
4754                fn end() {
4755                    let mut de = make_de("<![CDATA[ cdata ]]><![CDATA[ cdata2 ]]></tag>");
4756                    assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata  cdata2 ".into()));
4757                    match de.next() {
4758                        Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
4759                            assert_eq!(cause, IllFormedError::UnmatchedEndTag("tag".into()));
4760                        }
4761                        x => panic!(
4762                            "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
4763                            x
4764                        ),
4765                    }
4766                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4767                }
4768
4769                #[test]
4770                fn text() {
4771                    let mut de = make_de("<![CDATA[ cdata ]]><![CDATA[ cdata2 ]]> text ");
4772                    // Text is trimmed from the end
4773                    assert_eq!(
4774                        de.next().unwrap(),
4775                        DeEvent::Text(" cdata  cdata2  text".into())
4776                    );
4777                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4778                }
4779
4780                #[test]
4781                fn cdata() {
4782                    let mut de =
4783                        make_de("<![CDATA[ cdata ]]><![CDATA[ cdata2 ]]><![CDATA[ cdata3 ]]>");
4784                    assert_eq!(
4785                        de.next().unwrap(),
4786                        DeEvent::Text(" cdata  cdata2  cdata3 ".into())
4787                    );
4788                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4789                }
4790
4791                #[test]
4792                fn eof() {
4793                    let mut de = make_de("<![CDATA[ cdata ]]><![CDATA[ cdata2 ]]>");
4794                    assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata  cdata2 ".into()));
4795                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4796                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4797                }
4798            }
4799        }
4800    }
4801}