quick_xml/de/mod.rs
1//! Serde `Deserializer` module.
2//!
3//! Due to the complexity of the XML standard and the fact that Serde was developed
4//! with JSON in mind, not all Serde concepts apply smoothly to XML. This leads to
5//! that fact that some XML concepts are inexpressible in terms of Serde derives
6//! and may require manual deserialization.
7//!
8//! The most notable restriction is the ability to distinguish between _elements_
9//! and _attributes_, as no other format used by serde has such a conception.
10//!
11//! Due to that the mapping is performed in a best effort manner.
12//!
13//!
14//!
15//! Table of Contents
16//! =================
17//! - [Mapping XML to Rust types](#mapping-xml-to-rust-types)
18//! - [Basics](#basics)
19//! - [Optional attributes and elements](#optional-attributes-and-elements)
20//! - [Choices (`xs:choice` XML Schema type)](#choices-xschoice-xml-schema-type)
21//! - [Sequences (`xs:all` and `xs:sequence` XML Schema types)](#sequences-xsall-and-xssequence-xml-schema-types)
22//! - [Generate Rust types from XML](#generate-rust-types-from-xml)
23//! - [Composition Rules](#composition-rules)
24//! - [Enum Representations](#enum-representations)
25//! - [Normal enum variant](#normal-enum-variant)
26//! - [`$text` enum variant](#text-enum-variant)
27//! - [`$text` and `$value` special names](#text-and-value-special-names)
28//! - [`$text`](#text)
29//! - [`$value`](#value)
30//! - [Primitives and sequences of primitives](#primitives-and-sequences-of-primitives)
31//! - [Structs and sequences of structs](#structs-and-sequences-of-structs)
32//! - [Enums and sequences of enums](#enums-and-sequences-of-enums)
33//! - [Frequently Used Patterns](#frequently-used-patterns)
34//! - [`<element>` lists](#element-lists)
35//! - [Overlapped (Out-of-Order) Elements](#overlapped-out-of-order-elements)
36//! - [Internally Tagged Enums](#internally-tagged-enums)
37//!
38//!
39//!
40//! Mapping XML to Rust types
41//! =========================
42//!
43//! Type names are never considered when deserializing, so you can name your
44//! types as you wish. Other general rules:
45//! - `struct` field name could be represented in XML only as an attribute name
46//! or an element name;
47//! - `enum` variant name could be represented in XML only as an attribute name
48//! or an element name;
49//! - the unit struct, unit type `()` and unit enum variant can be deserialized
50//! from any valid XML content:
51//! - attribute and element names;
52//! - attribute and element values;
53//! - text or CDATA content (including mixed text and CDATA content).
54//!
55//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
56//!
57//! NOTE: All tests are marked with an `ignore` option, even though they do
58//! compile. This is because rustdoc marks such blocks with an information
59//! icon unlike `no_run` blocks.
60//!
61//! </div>
62//!
63//! <table>
64//! <thead>
65//! <tr><th colspan="2">
66//!
67//! ## Basics
68//!
69//! </th></tr>
70//! <tr><th>To parse all these XML's...</th><th>...use these Rust type(s)</th></tr>
71//! </thead>
72//! <tbody style="vertical-align:top;">
73//! <tr>
74//! <td>
75//! Content of attributes and text / CDATA content of elements (including mixed
76//! text and CDATA content):
77//!
78//! ```xml
79//! <... ...="content" />
80//! ```
81//! ```xml
82//! <...>content</...>
83//! ```
84//! ```xml
85//! <...><![CDATA[content]]></...>
86//! ```
87//! ```xml
88//! <...>text<![CDATA[cdata]]>text</...>
89//! ```
90//! Mixed text / CDATA content represents one logical string, `"textcdatatext"` in that case.
91//! </td>
92//! <td>
93//!
94//! You can use any type that can be deserialized from an `&str`, for example:
95//! - [`String`] and [`&str`]
96//! - [`Cow<str>`]
97//! - [`u32`], [`f32`] and other numeric types
98//! - `enum`s, like
99//! ```
100//! # use pretty_assertions::assert_eq;
101//! # use serde::Deserialize;
102//! # #[derive(Debug, PartialEq)]
103//! #[derive(Deserialize)]
104//! enum Language {
105//! Rust,
106//! Cpp,
107//! #[serde(other)]
108//! Other,
109//! }
110//! # #[derive(Debug, PartialEq, Deserialize)]
111//! # struct X { #[serde(rename = "$text")] x: Language }
112//! # assert_eq!(X { x: Language::Rust }, quick_xml::de::from_str("<x>Rust</x>").unwrap());
113//! # assert_eq!(X { x: Language::Cpp }, quick_xml::de::from_str("<x>C<![CDATA[p]]>p</x>").unwrap());
114//! # assert_eq!(X { x: Language::Other }, quick_xml::de::from_str("<x><![CDATA[other]]></x>").unwrap());
115//! ```
116//!
117//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
118//!
119//! NOTE: deserialization to non-owned types (i.e. borrow from the input),
120//! such as `&str`, is possible only if you parse document in the UTF-8
121//! encoding and content does not contain entity references such as `&`,
122//! or character references such as `
`, as well as text content represented
123//! by one piece of [text] or [CDATA] element.
124//! </div>
125//! <!-- TODO: document an error type returned -->
126//!
127//! [text]: Event::Text
128//! [CDATA]: Event::CData
129//! </td>
130//! </tr>
131//! <!-- 2 ===================================================================================== -->
132//! <tr>
133//! <td>
134//!
135//! Content of attributes and text / CDATA content of elements (including mixed
136//! text and CDATA content), which represents a space-delimited lists, as
137//! specified in the XML Schema specification for [`xs:list`] `simpleType`:
138//!
139//! ```xml
140//! <... ...="element1 element2 ..." />
141//! ```
142//! ```xml
143//! <...>
144//! element1
145//! element2
146//! ...
147//! </...>
148//! ```
149//! ```xml
150//! <...><![CDATA[
151//! element1
152//! element2
153//! ...
154//! ]]></...>
155//! ```
156//!
157//! [`xs:list`]: https://www.w3.org/TR/xmlschema11-2/#list-datatypes
158//! </td>
159//! <td>
160//!
161//! Use any type that deserialized using [`deserialize_seq()`] call, for example:
162//!
163//! ```
164//! type List = Vec<u32>;
165//! ```
166//!
167//! See the next row to learn where in your struct definition you should
168//! use that type.
169//!
170//! According to the XML Schema specification, delimiters for elements is one
171//! or more space (`' '`, `'\r'`, `'\n'`, and `'\t'`) character(s).
172//!
173//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
174//!
175//! NOTE: according to the XML Schema restrictions, you cannot escape those
176//! white-space characters, so list elements will _never_ contain them.
177//! In practice you will usually use `xs:list`s for lists of numbers or enumerated
178//! values which looks like identifiers in many languages, for example, `item`,
179//! `some_item` or `some-item`, so that shouldn't be a problem.
180//!
181//! NOTE: according to the XML Schema specification, list elements can be
182//! delimited only by spaces. Other delimiters (for example, commas) are not
183//! allowed.
184//!
185//! </div>
186//!
187//! [`deserialize_seq()`]: de::Deserializer::deserialize_seq
188//! </td>
189//! </tr>
190//! <!-- 3 ===================================================================================== -->
191//! <tr>
192//! <td>
193//! A typical XML with attributes. The root tag name does not matter:
194//!
195//! ```xml
196//! <any-tag one="..." two="..."/>
197//! ```
198//! </td>
199//! <td>
200//!
201//! A structure where each XML attribute is mapped to a field with a name
202//! starting with `@`. Because Rust identifiers do not permit the `@` character,
203//! you should use the `#[serde(rename = "@...")]` attribute to rename it.
204//! The name of the struct itself does not matter:
205//!
206//! ```
207//! # use serde::Deserialize;
208//! # type T = ();
209//! # type U = ();
210//! // Get both attributes
211//! # #[derive(Debug, PartialEq)]
212//! #[derive(Deserialize)]
213//! struct AnyName {
214//! #[serde(rename = "@one")]
215//! one: T,
216//!
217//! #[serde(rename = "@two")]
218//! two: U,
219//! }
220//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..." two="..."/>"#).unwrap();
221//! ```
222//! ```
223//! # use serde::Deserialize;
224//! # type T = ();
225//! // Get only the one attribute, ignore the other
226//! # #[derive(Debug, PartialEq)]
227//! #[derive(Deserialize)]
228//! struct AnyName {
229//! #[serde(rename = "@one")]
230//! one: T,
231//! }
232//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..." two="..."/>"#).unwrap();
233//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..."/>"#).unwrap();
234//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..."><one>...</one></any-tag>"#).unwrap();
235//! ```
236//! ```
237//! # use serde::Deserialize;
238//! // Ignore all attributes
239//! // You can also use the `()` type (unit type)
240//! # #[derive(Debug, PartialEq)]
241//! #[derive(Deserialize)]
242//! struct AnyName;
243//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..." two="..."/>"#).unwrap();
244//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..."><one>...</one></any-tag>"#).unwrap();
245//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag><one>...</one><two>...</two></any-tag>"#).unwrap();
246//! ```
247//!
248//! All these structs can be used to deserialize from an XML on the
249//! left side depending on amount of information that you want to get.
250//! Of course, you can combine them with elements extractor structs (see below).
251//!
252//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
253//!
254//! NOTE: XML allows you to have an attribute and an element with the same name
255//! inside the one element. quick-xml deals with that by prepending a `@` prefix
256//! to the name of attributes.
257//! </div>
258//! </td>
259//! </tr>
260//! <!-- 4 ===================================================================================== -->
261//! <tr>
262//! <td>
263//! A typical XML with child elements. The root tag name does not matter:
264//!
265//! ```xml
266//! <any-tag>
267//! <one>...</one>
268//! <two>...</two>
269//! </any-tag>
270//! ```
271//! </td>
272//! <td>
273//! A structure where each XML child element is mapped to the field.
274//! Each element name becomes a name of field. The name of the struct itself
275//! does not matter:
276//!
277//! ```
278//! # use serde::Deserialize;
279//! # type T = ();
280//! # type U = ();
281//! // Get both elements
282//! # #[derive(Debug, PartialEq)]
283//! #[derive(Deserialize)]
284//! struct AnyName {
285//! one: T,
286//! two: U,
287//! }
288//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag><one>...</one><two>...</two></any-tag>"#).unwrap();
289//! #
290//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..." two="..."/>"#).unwrap_err();
291//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..."><two>...</two></any-tag>"#).unwrap_err();
292//! ```
293//! ```
294//! # use serde::Deserialize;
295//! # type T = ();
296//! // Get only the one element, ignore the other
297//! # #[derive(Debug, PartialEq)]
298//! #[derive(Deserialize)]
299//! struct AnyName {
300//! one: T,
301//! }
302//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag><one>...</one><two>...</two></any-tag>"#).unwrap();
303//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..."><one>...</one></any-tag>"#).unwrap();
304//! ```
305//! ```
306//! # use serde::Deserialize;
307//! // Ignore all elements
308//! // You can also use the `()` type (unit type)
309//! # #[derive(Debug, PartialEq)]
310//! #[derive(Deserialize)]
311//! struct AnyName;
312//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..." two="..."/>"#).unwrap();
313//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag><one>...</one><two>...</two></any-tag>"#).unwrap();
314//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..."><two>...</two></any-tag>"#).unwrap();
315//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..."><one>...</one></any-tag>"#).unwrap();
316//! ```
317//!
318//! All these structs can be used to deserialize from an XML on the
319//! left side depending on amount of information that you want to get.
320//! Of course, you can combine them with attributes extractor structs (see above).
321//!
322//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
323//!
324//! NOTE: XML allows you to have an attribute and an element with the same name
325//! inside the one element. quick-xml deals with that by prepending a `@` prefix
326//! to the name of attributes.
327//! </div>
328//! </td>
329//! </tr>
330//! <!-- 5 ===================================================================================== -->
331//! <tr>
332//! <td>
333//! An XML with an attribute and a child element named equally:
334//!
335//! ```xml
336//! <any-tag field="...">
337//! <field>...</field>
338//! </any-tag>
339//! ```
340//! </td>
341//! <td>
342//!
343//! You MUST specify `#[serde(rename = "@field")]` on a field that will be used
344//! for an attribute:
345//!
346//! ```
347//! # use pretty_assertions::assert_eq;
348//! # use serde::Deserialize;
349//! # type T = ();
350//! # type U = ();
351//! # #[derive(Debug, PartialEq)]
352//! #[derive(Deserialize)]
353//! struct AnyName {
354//! #[serde(rename = "@field")]
355//! attribute: T,
356//! field: U,
357//! }
358//! # assert_eq!(
359//! # AnyName { attribute: (), field: () },
360//! # quick_xml::de::from_str(r#"
361//! # <any-tag field="...">
362//! # <field>...</field>
363//! # </any-tag>
364//! # "#).unwrap(),
365//! # );
366//! ```
367//! </td>
368//! </tr>
369//! <!-- ======================================================================================= -->
370//! <tr><th colspan="2">
371//!
372//! ## Optional attributes and elements
373//!
374//! </th></tr>
375//! <tr><th>To parse all these XML's...</th><th>...use these Rust type(s)</th></tr>
376//! <!-- 6 ===================================================================================== -->
377//! <tr>
378//! <td>
379//! An optional XML attribute that you want to capture.
380//! The root tag name does not matter:
381//!
382//! ```xml
383//! <any-tag optional="..."/>
384//! ```
385//! ```xml
386//! <any-tag/>
387//! ```
388//! </td>
389//! <td>
390//!
391//! A structure with an optional field, renamed according to the requirements
392//! for attributes:
393//!
394//! ```
395//! # use pretty_assertions::assert_eq;
396//! # use serde::Deserialize;
397//! # type T = ();
398//! # #[derive(Debug, PartialEq)]
399//! #[derive(Deserialize)]
400//! struct AnyName {
401//! #[serde(rename = "@optional")]
402//! optional: Option<T>,
403//! }
404//! # assert_eq!(AnyName { optional: Some(()) }, quick_xml::de::from_str(r#"<any-tag optional="..."/>"#).unwrap());
405//! # assert_eq!(AnyName { optional: None }, quick_xml::de::from_str(r#"<any-tag/>"#).unwrap());
406//! ```
407//! When the XML attribute is present, type `T` will be deserialized from
408//! an attribute value (which is a string). Note, that if `T = String` or other
409//! string type, the empty attribute is mapped to a `Some("")`, whereas `None`
410//! represents the missed attribute:
411//! ```xml
412//! <any-tag optional="..."/><!-- Some("...") -->
413//! <any-tag optional=""/> <!-- Some("") -->
414//! <any-tag/> <!-- None -->
415//! ```
416//! </td>
417//! </tr>
418//! <!-- 7 ===================================================================================== -->
419//! <tr>
420//! <td>
421//! An optional XML elements that you want to capture.
422//! The root tag name does not matter:
423//!
424//! ```xml
425//! <any-tag/>
426//! <optional>...</optional>
427//! </any-tag>
428//! ```
429//! ```xml
430//! <any-tag/>
431//! <optional/>
432//! </any-tag>
433//! ```
434//! ```xml
435//! <any-tag/>
436//! ```
437//! </td>
438//! <td>
439//!
440//! A structure with an optional field:
441//!
442//! ```
443//! # use pretty_assertions::assert_eq;
444//! # use serde::Deserialize;
445//! # type T = ();
446//! # #[derive(Debug, PartialEq)]
447//! #[derive(Deserialize)]
448//! struct AnyName {
449//! optional: Option<T>,
450//! }
451//! # assert_eq!(AnyName { optional: Some(()) }, quick_xml::de::from_str(r#"<any-tag><optional>...</optional></any-tag>"#).unwrap());
452//! # assert_eq!(AnyName { optional: None }, quick_xml::de::from_str(r#"<any-tag/>"#).unwrap());
453//! ```
454//! When the XML element is present, type `T` will be deserialized from an
455//! element (which is a string or a multi-mapping -- i.e. mapping which can have
456//! duplicated keys).
457//! <div style="background:rgba(80, 240, 100, 0.20);padding:0.75em;">
458//!
459//! Currently some edge cases exists described in the issue [#497].
460//! </div>
461//! </td>
462//! </tr>
463//! <!-- ======================================================================================= -->
464//! <tr><th colspan="2">
465//!
466//! ## Choices (`xs:choice` XML Schema type)
467//!
468//! </th></tr>
469//! <tr><th>To parse all these XML's...</th><th>...use these Rust type(s)</th></tr>
470//! <!-- 8 ===================================================================================== -->
471//! <tr>
472//! <td>
473//! An XML with different root tag names, as well as text / CDATA content:
474//!
475//! ```xml
476//! <one field1="...">...</one>
477//! ```
478//! ```xml
479//! <two>
480//! <field2>...</field2>
481//! </two>
482//! ```
483//! ```xml
484//! Text <![CDATA[or (mixed)
485//! CDATA]]> content
486//! ```
487//! </td>
488//! <td>
489//!
490//! An enum where each variant has the name of a possible root tag. The name of
491//! the enum itself does not matter.
492//!
493//! If you need to get the textual content, mark a variant with `#[serde(rename = "$text")]`.
494//!
495//! All these structs can be used to deserialize from any XML on the
496//! left side depending on amount of information that you want to get:
497//!
498//! ```
499//! # use pretty_assertions::assert_eq;
500//! # use serde::Deserialize;
501//! # type T = ();
502//! # type U = ();
503//! # #[derive(Debug, PartialEq)]
504//! #[derive(Deserialize)]
505//! #[serde(rename_all = "snake_case")]
506//! enum AnyName {
507//! One { #[serde(rename = "@field1")] field1: T },
508//! Two { field2: U },
509//!
510//! /// Use unit variant, if you do not care of a content.
511//! /// You can use tuple variant if you want to parse
512//! /// textual content as an xs:list.
513//! /// Struct variants are will pass a string to the
514//! /// struct enum variant visitor, which typically
515//! /// returns Err(Custom)
516//! #[serde(rename = "$text")]
517//! Text(String),
518//! }
519//! # assert_eq!(AnyName::One { field1: () }, quick_xml::de::from_str(r#"<one field1="...">...</one>"#).unwrap());
520//! # assert_eq!(AnyName::Two { field2: () }, quick_xml::de::from_str(r#"<two><field2>...</field2></two>"#).unwrap());
521//! # assert_eq!(AnyName::Text("text cdata ".into()), quick_xml::de::from_str(r#"text <![CDATA[ cdata ]]>"#).unwrap());
522//! ```
523//! ```
524//! # use pretty_assertions::assert_eq;
525//! # use serde::Deserialize;
526//! # type T = ();
527//! # #[derive(Debug, PartialEq)]
528//! #[derive(Deserialize)]
529//! struct Two {
530//! field2: T,
531//! }
532//! # #[derive(Debug, PartialEq)]
533//! #[derive(Deserialize)]
534//! #[serde(rename_all = "snake_case")]
535//! enum AnyName {
536//! // `field1` content discarded
537//! One,
538//! Two(Two),
539//! #[serde(rename = "$text")]
540//! Text,
541//! }
542//! # assert_eq!(AnyName::One, quick_xml::de::from_str(r#"<one field1="...">...</one>"#).unwrap());
543//! # assert_eq!(AnyName::Two(Two { field2: () }), quick_xml::de::from_str(r#"<two><field2>...</field2></two>"#).unwrap());
544//! # assert_eq!(AnyName::Text, quick_xml::de::from_str(r#"text <![CDATA[ cdata ]]>"#).unwrap());
545//! ```
546//! ```
547//! # use pretty_assertions::assert_eq;
548//! # use serde::Deserialize;
549//! # #[derive(Debug, PartialEq)]
550//! #[derive(Deserialize)]
551//! #[serde(rename_all = "snake_case")]
552//! enum AnyName {
553//! One,
554//! // the <two> and textual content will be mapped to this
555//! #[serde(other)]
556//! Other,
557//! }
558//! # assert_eq!(AnyName::One, quick_xml::de::from_str(r#"<one field1="...">...</one>"#).unwrap());
559//! # assert_eq!(AnyName::Other, quick_xml::de::from_str(r#"<two><field2>...</field2></two>"#).unwrap());
560//! # assert_eq!(AnyName::Other, quick_xml::de::from_str(r#"text <![CDATA[ cdata ]]>"#).unwrap());
561//! ```
562//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
563//!
564//! NOTE: You should have variants for all possible tag names in your enum
565//! or have an `#[serde(other)]` variant.
566//! <!-- TODO: document an error type if that requirement is violated -->
567//! </div>
568//! </td>
569//! </tr>
570//! <!-- 9 ===================================================================================== -->
571//! <tr>
572//! <td>
573//!
574//! `<xs:choice>` embedded in the other element, and at the same time you want
575//! to get access to other attributes that can appear in the same container
576//! (`<any-tag>`). Also this case can be described, as if you want to choose
577//! Rust enum variant based on a tag name:
578//!
579//! ```xml
580//! <any-tag field="...">
581//! <one>...</one>
582//! </any-tag>
583//! ```
584//! ```xml
585//! <any-tag field="...">
586//! <two>...</two>
587//! </any-tag>
588//! ```
589//! ```xml
590//! <any-tag field="...">
591//! Text <![CDATA[or (mixed)
592//! CDATA]]> content
593//! </any-tag>
594//! ```
595//! </td>
596//! <td>
597//!
598//! A structure with a field which type is an `enum`.
599//!
600//! If you need to get a textual content, mark a variant with `#[serde(rename = "$text")]`.
601//!
602//! Names of the enum, struct, and struct field with `Choice` type does not matter:
603//!
604//! ```
605//! # use pretty_assertions::assert_eq;
606//! # use serde::Deserialize;
607//! # type T = ();
608//! # #[derive(Debug, PartialEq)]
609//! #[derive(Deserialize)]
610//! #[serde(rename_all = "snake_case")]
611//! enum Choice {
612//! One,
613//! Two,
614//!
615//! /// Use unit variant, if you do not care of a content.
616//! /// You can use tuple variant if you want to parse
617//! /// textual content as an xs:list.
618//! /// Struct variants are will pass a string to the
619//! /// struct enum variant visitor, which typically
620//! /// returns Err(Custom)
621//! #[serde(rename = "$text")]
622//! Text(String),
623//! }
624//! # #[derive(Debug, PartialEq)]
625//! #[derive(Deserialize)]
626//! struct AnyName {
627//! #[serde(rename = "@field")]
628//! field: T,
629//!
630//! #[serde(rename = "$value")]
631//! any_name: Choice,
632//! }
633//! # assert_eq!(
634//! # AnyName { field: (), any_name: Choice::One },
635//! # quick_xml::de::from_str(r#"<any-tag field="..."><one>...</one></any-tag>"#).unwrap(),
636//! # );
637//! # assert_eq!(
638//! # AnyName { field: (), any_name: Choice::Two },
639//! # quick_xml::de::from_str(r#"<any-tag field="..."><two>...</two></any-tag>"#).unwrap(),
640//! # );
641//! # assert_eq!(
642//! # AnyName { field: (), any_name: Choice::Text("text cdata ".into()) },
643//! # quick_xml::de::from_str(r#"<any-tag field="...">text <![CDATA[ cdata ]]></any-tag>"#).unwrap(),
644//! # );
645//! ```
646//! </td>
647//! </tr>
648//! <!-- 10 ==================================================================================== -->
649//! <tr>
650//! <td>
651//!
652//! `<xs:choice>` embedded in the other element, and at the same time you want
653//! to get access to other elements that can appear in the same container
654//! (`<any-tag>`). Also this case can be described, as if you want to choose
655//! Rust enum variant based on a tag name:
656//!
657//! ```xml
658//! <any-tag>
659//! <field>...</field>
660//! <one>...</one>
661//! </any-tag>
662//! ```
663//! ```xml
664//! <any-tag>
665//! <two>...</two>
666//! <field>...</field>
667//! </any-tag>
668//! ```
669//! </td>
670//! <td>
671//!
672//! A structure with a field which type is an `enum`.
673//!
674//! Names of the enum, struct, and struct field with `Choice` type does not matter:
675//!
676//! ```
677//! # use pretty_assertions::assert_eq;
678//! # use serde::Deserialize;
679//! # type T = ();
680//! # #[derive(Debug, PartialEq)]
681//! #[derive(Deserialize)]
682//! #[serde(rename_all = "snake_case")]
683//! enum Choice {
684//! One,
685//! Two,
686//! }
687//! # #[derive(Debug, PartialEq)]
688//! #[derive(Deserialize)]
689//! struct AnyName {
690//! field: T,
691//!
692//! #[serde(rename = "$value")]
693//! any_name: Choice,
694//! }
695//! # assert_eq!(
696//! # AnyName { field: (), any_name: Choice::One },
697//! # quick_xml::de::from_str(r#"<any-tag><field>...</field><one>...</one></any-tag>"#).unwrap(),
698//! # );
699//! # assert_eq!(
700//! # AnyName { field: (), any_name: Choice::Two },
701//! # quick_xml::de::from_str(r#"<any-tag><two>...</two><field>...</field></any-tag>"#).unwrap(),
702//! # );
703//! ```
704//!
705//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
706//!
707//! NOTE: if your `Choice` enum would contain an `#[serde(other)]`
708//! variant, element `<field>` will be mapped to the `field` and not to the enum
709//! variant.
710//! </div>
711//!
712//! </td>
713//! </tr>
714//! <!-- 11 ==================================================================================== -->
715//! <tr>
716//! <td>
717//!
718//! `<xs:choice>` encapsulated in other element with a fixed name:
719//!
720//! ```xml
721//! <any-tag field="...">
722//! <choice>
723//! <one>...</one>
724//! </choice>
725//! </any-tag>
726//! ```
727//! ```xml
728//! <any-tag field="...">
729//! <choice>
730//! <two>...</two>
731//! </choice>
732//! </any-tag>
733//! ```
734//! </td>
735//! <td>
736//!
737//! A structure with a field of an intermediate type with one field of `enum` type.
738//! Actually, this example is not necessary, because you can construct it by yourself
739//! using the composition rules that were described above. However the XML construction
740//! described here is very common, so it is shown explicitly.
741//!
742//! Names of the enum and struct does not matter:
743//!
744//! ```
745//! # use pretty_assertions::assert_eq;
746//! # use serde::Deserialize;
747//! # type T = ();
748//! # #[derive(Debug, PartialEq)]
749//! #[derive(Deserialize)]
750//! #[serde(rename_all = "snake_case")]
751//! enum Choice {
752//! One,
753//! Two,
754//! }
755//! # #[derive(Debug, PartialEq)]
756//! #[derive(Deserialize)]
757//! struct Holder {
758//! #[serde(rename = "$value")]
759//! any_name: Choice,
760//! }
761//! # #[derive(Debug, PartialEq)]
762//! #[derive(Deserialize)]
763//! struct AnyName {
764//! #[serde(rename = "@field")]
765//! field: T,
766//!
767//! choice: Holder,
768//! }
769//! # assert_eq!(
770//! # AnyName { field: (), choice: Holder { any_name: Choice::One } },
771//! # quick_xml::de::from_str(r#"<any-tag field="..."><choice><one>...</one></choice></any-tag>"#).unwrap(),
772//! # );
773//! # assert_eq!(
774//! # AnyName { field: (), choice: Holder { any_name: Choice::Two } },
775//! # quick_xml::de::from_str(r#"<any-tag field="..."><choice><two>...</two></choice></any-tag>"#).unwrap(),
776//! # );
777//! ```
778//! </td>
779//! </tr>
780//! <!-- 12 ==================================================================================== -->
781//! <tr>
782//! <td>
783//!
784//! `<xs:choice>` encapsulated in other element with a fixed name:
785//!
786//! ```xml
787//! <any-tag>
788//! <field>...</field>
789//! <choice>
790//! <one>...</one>
791//! </choice>
792//! </any-tag>
793//! ```
794//! ```xml
795//! <any-tag>
796//! <choice>
797//! <two>...</two>
798//! </choice>
799//! <field>...</field>
800//! </any-tag>
801//! ```
802//! </td>
803//! <td>
804//!
805//! A structure with a field of an intermediate type with one field of `enum` type.
806//! Actually, this example is not necessary, because you can construct it by yourself
807//! using the composition rules that were described above. However the XML construction
808//! described here is very common, so it is shown explicitly.
809//!
810//! Names of the enum and struct does not matter:
811//!
812//! ```
813//! # use pretty_assertions::assert_eq;
814//! # use serde::Deserialize;
815//! # type T = ();
816//! # #[derive(Debug, PartialEq)]
817//! #[derive(Deserialize)]
818//! #[serde(rename_all = "snake_case")]
819//! enum Choice {
820//! One,
821//! Two,
822//! }
823//! # #[derive(Debug, PartialEq)]
824//! #[derive(Deserialize)]
825//! struct Holder {
826//! #[serde(rename = "$value")]
827//! any_name: Choice,
828//! }
829//! # #[derive(Debug, PartialEq)]
830//! #[derive(Deserialize)]
831//! struct AnyName {
832//! field: T,
833//!
834//! choice: Holder,
835//! }
836//! # assert_eq!(
837//! # AnyName { field: (), choice: Holder { any_name: Choice::One } },
838//! # quick_xml::de::from_str(r#"<any-tag><field>...</field><choice><one>...</one></choice></any-tag>"#).unwrap(),
839//! # );
840//! # assert_eq!(
841//! # AnyName { field: (), choice: Holder { any_name: Choice::Two } },
842//! # quick_xml::de::from_str(r#"<any-tag><choice><two>...</two></choice><field>...</field></any-tag>"#).unwrap(),
843//! # );
844//! ```
845//! </td>
846//! </tr>
847//! <!-- ======================================================================================== -->
848//! <tr><th colspan="2">
849//!
850//! ## Sequences (`xs:all` and `xs:sequence` XML Schema types)
851//!
852//! </th></tr>
853//! <tr><th>To parse all these XML's...</th><th>...use these Rust type(s)</th></tr>
854//! <!-- 13 ==================================================================================== -->
855//! <tr>
856//! <td>
857//! A sequence inside of a tag without a dedicated name:
858//!
859//! ```xml
860//! <any-tag/>
861//! ```
862//! ```xml
863//! <any-tag>
864//! <item/>
865//! </any-tag>
866//! ```
867//! ```xml
868//! <any-tag>
869//! <item/>
870//! <item/>
871//! <item/>
872//! </any-tag>
873//! ```
874//! </td>
875//! <td>
876//!
877//! A structure with a field which is a sequence type, for example, [`Vec`].
878//! Because XML syntax does not distinguish between empty sequences and missed
879//! elements, we should indicate that on the Rust side, because serde will require
880//! that field `item` exists. You can do that in two possible ways:
881//!
882//! Use the `#[serde(default)]` attribute for a [field] or the entire [struct]:
883//! ```
884//! # use pretty_assertions::assert_eq;
885//! # use serde::Deserialize;
886//! # type Item = ();
887//! # #[derive(Debug, PartialEq)]
888//! #[derive(Deserialize)]
889//! struct AnyName {
890//! #[serde(default)]
891//! item: Vec<Item>,
892//! }
893//! # assert_eq!(
894//! # AnyName { item: vec![] },
895//! # quick_xml::de::from_str(r#"<any-tag/>"#).unwrap(),
896//! # );
897//! # assert_eq!(
898//! # AnyName { item: vec![()] },
899//! # quick_xml::de::from_str(r#"<any-tag><item/></any-tag>"#).unwrap(),
900//! # );
901//! # assert_eq!(
902//! # AnyName { item: vec![(), (), ()] },
903//! # quick_xml::de::from_str(r#"<any-tag><item/><item/><item/></any-tag>"#).unwrap(),
904//! # );
905//! ```
906//!
907//! Use the [`Option`]. In that case inner array will always contains at least one
908//! element after deserialization:
909//! ```ignore
910//! # use pretty_assertions::assert_eq;
911//! # use serde::Deserialize;
912//! # type Item = ();
913//! # #[derive(Debug, PartialEq)]
914//! #[derive(Deserialize)]
915//! struct AnyName {
916//! item: Option<Vec<Item>>,
917//! }
918//! # assert_eq!(
919//! # AnyName { item: None },
920//! # quick_xml::de::from_str(r#"<any-tag/>"#).unwrap(),
921//! # );
922//! # assert_eq!(
923//! # AnyName { item: Some(vec![()]) },
924//! # quick_xml::de::from_str(r#"<any-tag><item/></any-tag>"#).unwrap(),
925//! # );
926//! # assert_eq!(
927//! # AnyName { item: Some(vec![(), (), ()]) },
928//! # quick_xml::de::from_str(r#"<any-tag><item/><item/><item/></any-tag>"#).unwrap(),
929//! # );
930//! ```
931//!
932//! See also [Frequently Used Patterns](#element-lists).
933//!
934//! [field]: https://serde.rs/field-attrs.html#default
935//! [struct]: https://serde.rs/container-attrs.html#default
936//! </td>
937//! </tr>
938//! <!-- 14 ==================================================================================== -->
939//! <tr>
940//! <td>
941//! A sequence with a strict order, probably with mixed content
942//! (text / CDATA and tags):
943//!
944//! ```xml
945//! <one>...</one>
946//! text
947//! <![CDATA[cdata]]>
948//! <two>...</two>
949//! <one>...</one>
950//! ```
951//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
952//!
953//! NOTE: this is just an example for showing mapping. XML does not allow
954//! multiple root tags -- you should wrap the sequence into a tag.
955//! </div>
956//! </td>
957//! <td>
958//!
959//! All elements mapped to the heterogeneous sequential type: tuple or named tuple.
960//! Each element of the tuple should be able to be deserialized from the nested
961//! element content (`...`), except the enum types which would be deserialized
962//! from the full element (`<one>...</one>`), so they could use the element name
963//! to choose the right variant:
964//!
965//! ```
966//! # use pretty_assertions::assert_eq;
967//! # use serde::Deserialize;
968//! # type One = ();
969//! # type Two = ();
970//! # /*
971//! type One = ...;
972//! type Two = ...;
973//! # */
974//! # #[derive(Debug, PartialEq)]
975//! #[derive(Deserialize)]
976//! struct AnyName(One, String, Two, One);
977//! # assert_eq!(
978//! # AnyName((), "text cdata".into(), (), ()),
979//! # quick_xml::de::from_str(r#"<one>...</one>text <![CDATA[cdata]]><two>...</two><one>...</one>"#).unwrap(),
980//! # );
981//! ```
982//! ```
983//! # use pretty_assertions::assert_eq;
984//! # use serde::Deserialize;
985//! # #[derive(Debug, PartialEq)]
986//! #[derive(Deserialize)]
987//! #[serde(rename_all = "snake_case")]
988//! enum Choice {
989//! One,
990//! }
991//! # type Two = ();
992//! # /*
993//! type Two = ...;
994//! # */
995//! type AnyName = (Choice, String, Two, Choice);
996//! # assert_eq!(
997//! # (Choice::One, "text cdata".to_string(), (), Choice::One),
998//! # quick_xml::de::from_str(r#"<one>...</one>text <![CDATA[cdata]]><two>...</two><one>...</one>"#).unwrap(),
999//! # );
1000//! ```
1001//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
1002//!
1003//! NOTE: consequent text and CDATA nodes are merged into the one text node,
1004//! so you cannot have two adjacent string types in your sequence.
1005//!
1006//! NOTE: In the case that the list might contain tags that are overlapped with
1007//! tags that do not correspond to the list you should add the feature [`overlapped-lists`].
1008//! </div>
1009//! </td>
1010//! </tr>
1011//! <!-- 15 ==================================================================================== -->
1012//! <tr>
1013//! <td>
1014//! A sequence with a non-strict order, probably with a mixed content
1015//! (text / CDATA and tags).
1016//!
1017//! ```xml
1018//! <one>...</one>
1019//! text
1020//! <![CDATA[cdata]]>
1021//! <two>...</two>
1022//! <one>...</one>
1023//! ```
1024//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
1025//!
1026//! NOTE: this is just an example for showing mapping. XML does not allow
1027//! multiple root tags -- you should wrap the sequence into a tag.
1028//! </div>
1029//! </td>
1030//! <td>
1031//! A homogeneous sequence of elements with a fixed or dynamic size:
1032//!
1033//! ```
1034//! # use pretty_assertions::assert_eq;
1035//! # use serde::Deserialize;
1036//! # #[derive(Debug, PartialEq)]
1037//! #[derive(Deserialize)]
1038//! #[serde(rename_all = "snake_case")]
1039//! enum Choice {
1040//! One,
1041//! Two,
1042//! #[serde(other)]
1043//! Other,
1044//! }
1045//! type AnyName = [Choice; 4];
1046//! # assert_eq!(
1047//! # [Choice::One, Choice::Other, Choice::Two, Choice::One],
1048//! # quick_xml::de::from_str::<AnyName>(r#"<one>...</one>text <![CDATA[cdata]]><two>...</two><one>...</one>"#).unwrap(),
1049//! # );
1050//! ```
1051//! ```
1052//! # use pretty_assertions::assert_eq;
1053//! # use serde::Deserialize;
1054//! # #[derive(Debug, PartialEq)]
1055//! #[derive(Deserialize)]
1056//! #[serde(rename_all = "snake_case")]
1057//! enum Choice {
1058//! One,
1059//! Two,
1060//! #[serde(rename = "$text")]
1061//! Other(String),
1062//! }
1063//! type AnyName = Vec<Choice>;
1064//! # assert_eq!(
1065//! # vec![
1066//! # Choice::One,
1067//! # Choice::Other("text cdata".into()),
1068//! # Choice::Two,
1069//! # Choice::One,
1070//! # ],
1071//! # quick_xml::de::from_str::<AnyName>(r#"<one>...</one>text <![CDATA[cdata]]><two>...</two><one>...</one>"#).unwrap(),
1072//! # );
1073//! ```
1074//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
1075//!
1076//! NOTE: consequent text and CDATA nodes are merged into the one text node,
1077//! so you cannot have two adjacent string types in your sequence.
1078//! </div>
1079//! </td>
1080//! </tr>
1081//! <!-- 16 ==================================================================================== -->
1082//! <tr>
1083//! <td>
1084//! A sequence with a strict order, probably with a mixed content,
1085//! (text and tags) inside of the other element:
1086//!
1087//! ```xml
1088//! <any-tag attribute="...">
1089//! <one>...</one>
1090//! text
1091//! <![CDATA[cdata]]>
1092//! <two>...</two>
1093//! <one>...</one>
1094//! </any-tag>
1095//! ```
1096//! </td>
1097//! <td>
1098//!
1099//! A structure where all child elements mapped to the one field which have
1100//! a heterogeneous sequential type: tuple or named tuple. Each element of the
1101//! tuple should be able to be deserialized from the full element (`<one>...</one>`).
1102//!
1103//! You MUST specify `#[serde(rename = "$value")]` on that field:
1104//!
1105//! ```
1106//! # use pretty_assertions::assert_eq;
1107//! # use serde::Deserialize;
1108//! # type One = ();
1109//! # type Two = ();
1110//! # /*
1111//! type One = ...;
1112//! type Two = ...;
1113//! # */
1114//!
1115//! # #[derive(Debug, PartialEq)]
1116//! #[derive(Deserialize)]
1117//! struct AnyName {
1118//! #[serde(rename = "@attribute")]
1119//! # attribute: (),
1120//! # /*
1121//! attribute: ...,
1122//! # */
1123//! // Does not (yet?) supported by the serde
1124//! // https://github.com/serde-rs/serde/issues/1905
1125//! // #[serde(flatten)]
1126//! #[serde(rename = "$value")]
1127//! any_name: (One, String, Two, One),
1128//! }
1129//! # assert_eq!(
1130//! # AnyName { attribute: (), any_name: ((), "text cdata".into(), (), ()) },
1131//! # quick_xml::de::from_str("\
1132//! # <any-tag attribute='...'>\
1133//! # <one>...</one>\
1134//! # text \
1135//! # <![CDATA[cdata]]>\
1136//! # <two>...</two>\
1137//! # <one>...</one>\
1138//! # </any-tag>"
1139//! # ).unwrap(),
1140//! # );
1141//! ```
1142//! ```
1143//! # use pretty_assertions::assert_eq;
1144//! # use serde::Deserialize;
1145//! # type One = ();
1146//! # type Two = ();
1147//! # /*
1148//! type One = ...;
1149//! type Two = ...;
1150//! # */
1151//!
1152//! # #[derive(Debug, PartialEq)]
1153//! #[derive(Deserialize)]
1154//! struct NamedTuple(One, String, Two, One);
1155//!
1156//! # #[derive(Debug, PartialEq)]
1157//! #[derive(Deserialize)]
1158//! struct AnyName {
1159//! #[serde(rename = "@attribute")]
1160//! # attribute: (),
1161//! # /*
1162//! attribute: ...,
1163//! # */
1164//! // Does not (yet?) supported by the serde
1165//! // https://github.com/serde-rs/serde/issues/1905
1166//! // #[serde(flatten)]
1167//! #[serde(rename = "$value")]
1168//! any_name: NamedTuple,
1169//! }
1170//! # assert_eq!(
1171//! # AnyName { attribute: (), any_name: NamedTuple((), "text cdata".into(), (), ()) },
1172//! # quick_xml::de::from_str("\
1173//! # <any-tag attribute='...'>\
1174//! # <one>...</one>\
1175//! # text \
1176//! # <![CDATA[cdata]]>\
1177//! # <two>...</two>\
1178//! # <one>...</one>\
1179//! # </any-tag>"
1180//! # ).unwrap(),
1181//! # );
1182//! ```
1183//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
1184//!
1185//! NOTE: consequent text and CDATA nodes are merged into the one text node,
1186//! so you cannot have two adjacent string types in your sequence.
1187//! </div>
1188//! </td>
1189//! </tr>
1190//! <!-- 17 ==================================================================================== -->
1191//! <tr>
1192//! <td>
1193//! A sequence with a non-strict order, probably with a mixed content
1194//! (text / CDATA and tags) inside of the other element:
1195//!
1196//! ```xml
1197//! <any-tag>
1198//! <one>...</one>
1199//! text
1200//! <![CDATA[cdata]]>
1201//! <two>...</two>
1202//! <one>...</one>
1203//! </any-tag>
1204//! ```
1205//! </td>
1206//! <td>
1207//!
1208//! A structure where all child elements mapped to the one field which have
1209//! a homogeneous sequential type: array-like container. A container type `T`
1210//! should be able to be deserialized from the nested element content (`...`),
1211//! except if it is an enum type which would be deserialized from the full
1212//! element (`<one>...</one>`).
1213//!
1214//! You MUST specify `#[serde(rename = "$value")]` on that field:
1215//!
1216//! ```
1217//! # use pretty_assertions::assert_eq;
1218//! # use serde::Deserialize;
1219//! # #[derive(Debug, PartialEq)]
1220//! #[derive(Deserialize)]
1221//! #[serde(rename_all = "snake_case")]
1222//! enum Choice {
1223//! One,
1224//! Two,
1225//! #[serde(rename = "$text")]
1226//! Other(String),
1227//! }
1228//! # #[derive(Debug, PartialEq)]
1229//! #[derive(Deserialize)]
1230//! struct AnyName {
1231//! #[serde(rename = "@attribute")]
1232//! # attribute: (),
1233//! # /*
1234//! attribute: ...,
1235//! # */
1236//! // Does not (yet?) supported by the serde
1237//! // https://github.com/serde-rs/serde/issues/1905
1238//! // #[serde(flatten)]
1239//! #[serde(rename = "$value")]
1240//! any_name: [Choice; 4],
1241//! }
1242//! # assert_eq!(
1243//! # AnyName { attribute: (), any_name: [
1244//! # Choice::One,
1245//! # Choice::Other("text cdata".into()),
1246//! # Choice::Two,
1247//! # Choice::One,
1248//! # ] },
1249//! # quick_xml::de::from_str("\
1250//! # <any-tag attribute='...'>\
1251//! # <one>...</one>\
1252//! # text \
1253//! # <![CDATA[cdata]]>\
1254//! # <two>...</two>\
1255//! # <one>...</one>\
1256//! # </any-tag>"
1257//! # ).unwrap(),
1258//! # );
1259//! ```
1260//! ```
1261//! # use pretty_assertions::assert_eq;
1262//! # use serde::Deserialize;
1263//! # #[derive(Debug, PartialEq)]
1264//! #[derive(Deserialize)]
1265//! #[serde(rename_all = "snake_case")]
1266//! enum Choice {
1267//! One,
1268//! Two,
1269//! #[serde(rename = "$text")]
1270//! Other(String),
1271//! }
1272//! # #[derive(Debug, PartialEq)]
1273//! #[derive(Deserialize)]
1274//! struct AnyName {
1275//! #[serde(rename = "@attribute")]
1276//! # attribute: (),
1277//! # /*
1278//! attribute: ...,
1279//! # */
1280//! // Does not (yet?) supported by the serde
1281//! // https://github.com/serde-rs/serde/issues/1905
1282//! // #[serde(flatten)]
1283//! #[serde(rename = "$value")]
1284//! any_name: Vec<Choice>,
1285//! }
1286//! # assert_eq!(
1287//! # AnyName { attribute: (), any_name: vec![
1288//! # Choice::One,
1289//! # Choice::Other("text cdata".into()),
1290//! # Choice::Two,
1291//! # Choice::One,
1292//! # ] },
1293//! # quick_xml::de::from_str("\
1294//! # <any-tag attribute='...'>\
1295//! # <one>...</one>\
1296//! # text \
1297//! # <![CDATA[cdata]]>\
1298//! # <two>...</two>\
1299//! # <one>...</one>\
1300//! # </any-tag>"
1301//! # ).unwrap(),
1302//! # );
1303//! ```
1304//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
1305//!
1306//! NOTE: consequent text and CDATA nodes are merged into the one text node,
1307//! so you cannot have two adjacent string types in your sequence.
1308//! </div>
1309//! </td>
1310//! </tr>
1311//! </tbody>
1312//! </table>
1313//!
1314//!
1315//! Generate Rust types from XML
1316//! ============================
1317//!
1318//! To speed up the creation of Rust types that represent a given XML file you can
1319//! use the [xml_schema_generator](https://github.com/Thomblin/xml_schema_generator).
1320//! It provides a standalone binary and a Rust library that parses one or more XML files
1321//! and generates a collection of structs that are compatible with quick_xml::de.
1322//!
1323//!
1324//!
1325//! Composition Rules
1326//! =================
1327//!
1328//! The XML format is very different from other formats supported by `serde`.
1329//! One such difference it is how data in the serialized form is related to
1330//! the Rust type. Usually each byte in the data can be associated only with
1331//! one field in the data structure. However, XML is an exception.
1332//!
1333//! For example, took this XML:
1334//!
1335//! ```xml
1336//! <any>
1337//! <key attr="value"/>
1338//! </any>
1339//! ```
1340//!
1341//! and try to deserialize it to the struct `AnyName`:
1342//!
1343//! ```no_run
1344//! # use serde::Deserialize;
1345//! #[derive(Deserialize)]
1346//! struct AnyName { // AnyName calls `deserialize_struct` on `<any><key attr="value"/></any>`
1347//! // Used data: ^^^^^^^^^^^^^^^^^^^
1348//! key: Inner, // Inner calls `deserialize_struct` on `<key attr="value"/>`
1349//! // Used data: ^^^^^^^^^^^^
1350//! }
1351//! #[derive(Deserialize)]
1352//! struct Inner {
1353//! #[serde(rename = "@attr")]
1354//! attr: String, // String calls `deserialize_string` on `value`
1355//! // Used data: ^^^^^
1356//! }
1357//! ```
1358//!
1359//! Comments shows what methods of a [`Deserializer`] called by each struct
1360//! `deserialize` method and which input their seen. **Used data** shows, what
1361//! content is actually used for deserializing. As you see, name of the inner
1362//! `<key>` tag used both as a map key / outer struct field name and as part
1363//! of the inner struct (although _value_ of the tag, i.e. `key` is not used
1364//! by it).
1365//!
1366//!
1367//!
1368//! Enum Representations
1369//! ====================
1370//!
1371//! `quick-xml` represents enums differently in normal fields, `$text` fields and
1372//! `$value` fields. A normal representation is compatible with serde's adjacent
1373//! and internal tags feature -- tag for adjacently and internally tagged enums
1374//! are serialized using [`Serializer::serialize_unit_variant`] and deserialized
1375//! using [`Deserializer::deserialize_enum`].
1376//!
1377//! Use those simple rules to remember, how enum would be represented in XML:
1378//! - In `$value` field the representation is always the same as top-level representation;
1379//! - In `$text` field the representation is always the same as in normal field,
1380//! but surrounding tags with field name are removed;
1381//! - In normal field the representation is always contains a tag with a field name.
1382//!
1383//! Normal enum variant
1384//! -------------------
1385//!
1386//! To model an `xs:choice` XML construct use `$value` field.
1387//! To model a top-level `xs:choice` just use the enum type.
1388//!
1389//! |Kind |Top-level and in `$value` field |In normal field |In `$text` field |
1390//! |-------|-----------------------------------------|---------------------|---------------------|
1391//! |Unit |`<Unit/>` |`<field>Unit</field>`|`Unit` |
1392//! |Newtype|`<Newtype>42</Newtype>` |Err(Custom) [^0] |Err(Custom) [^0] |
1393//! |Tuple |`<Tuple>42</Tuple><Tuple>answer</Tuple>` |Err(Custom) [^0] |Err(Custom) [^0] |
1394//! |Struct |`<Struct><q>42</q><a>answer</a></Struct>`|Err(Custom) [^0] |Err(Custom) [^0] |
1395//!
1396//! `$text` enum variant
1397//! --------------------
1398//!
1399//! |Kind |Top-level and in `$value` field |In normal field |In `$text` field |
1400//! |-------|-----------------------------------------|---------------------|---------------------|
1401//! |Unit |_(empty)_ |`<field/>` |_(empty)_ |
1402//! |Newtype|`42` |Err(Custom) [^0] [^1]|Err(Custom) [^0] [^2]|
1403//! |Tuple |`42 answer` |Err(Custom) [^0] [^3]|Err(Custom) [^0] [^4]|
1404//! |Struct |Err(Custom) [^0] |Err(Custom) [^0] |Err(Custom) [^0] |
1405//!
1406//! [^0]: Error is returned by the deserialized type. In case of derived implementation a `Custom`
1407//! error will be returned, but custom deserialize implementation can successfully deserialize
1408//! value from a string which will be passed to it.
1409//!
1410//! [^1]: If this serialize as `<field>42</field>` then it will be ambiguity during deserialization,
1411//! because it clash with `Unit` representation in normal field.
1412//!
1413//! [^2]: If this serialize as `42` then it will be ambiguity during deserialization,
1414//! because it clash with `Unit` representation in `$text` field.
1415//!
1416//! [^3]: If this serialize as `<field>42 answer</field>` then it will be ambiguity during deserialization,
1417//! because it clash with `Unit` representation in normal field.
1418//!
1419//! [^4]: If this serialize as `42 answer` then it will be ambiguity during deserialization,
1420//! because it clash with `Unit` representation in `$text` field.
1421//!
1422//!
1423//!
1424//! `$text` and `$value` special names
1425//! ==================================
1426//!
1427//! quick-xml supports two special names for fields -- `$text` and `$value`.
1428//! Although they may seem the same, there is a distinction. Two different
1429//! names is required mostly for serialization, because quick-xml should know
1430//! how you want to serialize certain constructs, which could be represented
1431//! through XML in multiple different ways.
1432//!
1433//! The only difference is in how complex types and sequences are serialized.
1434//! If you doubt which one you should select, begin with [`$value`](#value).
1435//!
1436//! ## `$text`
1437//! `$text` is used when you want to write your XML as a text or a CDATA content.
1438//! More formally, field with that name represents simple type definition with
1439//! `{variety} = atomic` or `{variety} = union` whose basic members are all atomic,
1440//! as described in the [specification].
1441//!
1442//! As a result, not all types of such fields can be serialized. Only serialization
1443//! of following types are supported:
1444//! - all primitive types (strings, numbers, booleans)
1445//! - unit variants of enumerations (serializes to a name of a variant)
1446//! - newtypes (delegates serialization to inner type)
1447//! - [`Option`] of above (`None` serializes to nothing)
1448//! - sequences (including tuples and tuple variants of enumerations) of above,
1449//! excluding `None` and empty string elements (because it will not be possible
1450//! to deserialize them back). The elements are separated by space(s)
1451//! - unit type `()` and unit structs (serializes to nothing)
1452//!
1453//! Complex types, such as structs and maps, are not supported in this field.
1454//! If you want them, you should use `$value`.
1455//!
1456//! Sequences serialized to a space-delimited string, that is why only certain
1457//! types are allowed in this mode:
1458//!
1459//! ```
1460//! # use serde::{Deserialize, Serialize};
1461//! # use quick_xml::de::from_str;
1462//! # use quick_xml::se::to_string;
1463//! #[derive(Deserialize, Serialize, PartialEq, Debug)]
1464//! struct AnyName {
1465//! #[serde(rename = "$text")]
1466//! field: Vec<usize>,
1467//! }
1468//!
1469//! let obj = AnyName { field: vec![1, 2, 3] };
1470//! let xml = to_string(&obj).unwrap();
1471//! assert_eq!(xml, "<AnyName>1 2 3</AnyName>");
1472//!
1473//! let object: AnyName = from_str(&xml).unwrap();
1474//! assert_eq!(object, obj);
1475//! ```
1476//!
1477//! ## `$value`
1478//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
1479//!
1480//! NOTE: a name `#content` would better explain the purpose of that field,
1481//! but `$value` is used for compatibility with other XML serde crates, which
1482//! uses that name. This will allow you to switch XML crates more smoothly if required.
1483//! </div>
1484//!
1485//! Representation of primitive types in `$value` does not differ from their
1486//! representation in `$text` field. The difference is how sequences are serialized.
1487//! `$value` serializes each sequence item as a separate XML element. The name
1488//! of that element is taken from serialized type, and because only `enum`s provide
1489//! such name (their variant name), only they should be used for such fields.
1490//!
1491//! `$value` fields does not support `struct` types with fields, the serialization
1492//! of such types would end with an `Err(Unsupported)`. Unit structs and unit
1493//! type `()` serializing to nothing and can be deserialized from any content.
1494//!
1495//! Serialization and deserialization of `$value` field performed as usual, except
1496//! that name for an XML element will be given by the serialized type, instead of
1497//! field. The latter allow to serialize enumerated types, where variant is encoded
1498//! as a tag name, and, so, represent an XSD `xs:choice` schema by the Rust `enum`.
1499//!
1500//! In the example below, field will be serialized as `<field/>`, because elements
1501//! get their names from the field name. It cannot be deserialized, because `Enum`
1502//! expects elements `<A/>`, `<B/>` or `<C/>`, but `AnyName` looked only for `<field/>`:
1503//!
1504//! ```
1505//! # use serde::{Deserialize, Serialize};
1506//! # use pretty_assertions::assert_eq;
1507//! # #[derive(PartialEq, Debug)]
1508//! #[derive(Deserialize, Serialize)]
1509//! enum Enum { A, B, C }
1510//!
1511//! # #[derive(PartialEq, Debug)]
1512//! #[derive(Deserialize, Serialize)]
1513//! struct AnyName {
1514//! // <field>A</field>, <field>B</field>, or <field>C</field>
1515//! field: Enum,
1516//! }
1517//! # assert_eq!(
1518//! # quick_xml::se::to_string(&AnyName { field: Enum::A }).unwrap(),
1519//! # "<AnyName><field>A</field></AnyName>",
1520//! # );
1521//! # assert_eq!(
1522//! # AnyName { field: Enum::B },
1523//! # quick_xml::de::from_str("<root><field>B</field></root>").unwrap(),
1524//! # );
1525//! ```
1526//!
1527//! If you rename field to `$value`, then `field` would be serialized as `<A/>`,
1528//! `<B/>` or `<C/>`, depending on the its content. It is also possible to
1529//! deserialize it from the same elements:
1530//!
1531//! ```
1532//! # use serde::{Deserialize, Serialize};
1533//! # use pretty_assertions::assert_eq;
1534//! # #[derive(Deserialize, Serialize, PartialEq, Debug)]
1535//! # enum Enum { A, B, C }
1536//! #
1537//! # #[derive(PartialEq, Debug)]
1538//! #[derive(Deserialize, Serialize)]
1539//! struct AnyName {
1540//! // <A/>, <B/> or <C/>
1541//! #[serde(rename = "$value")]
1542//! field: Enum,
1543//! }
1544//! # assert_eq!(
1545//! # quick_xml::se::to_string(&AnyName { field: Enum::A }).unwrap(),
1546//! # "<AnyName><A/></AnyName>",
1547//! # );
1548//! # assert_eq!(
1549//! # AnyName { field: Enum::B },
1550//! # quick_xml::de::from_str("<root><B/></root>").unwrap(),
1551//! # );
1552//! ```
1553//!
1554//! ### Primitives and sequences of primitives
1555//!
1556//! Sequences serialized to a list of elements. Note, that types that does not
1557//! produce their own tag (i. e. primitives) will produce [`SeError::Unsupported`]
1558//! if they contains more that one element, because such sequence cannot be
1559//! deserialized to the same value:
1560//!
1561//! ```
1562//! # use serde::{Deserialize, Serialize};
1563//! # use pretty_assertions::assert_eq;
1564//! # use quick_xml::de::from_str;
1565//! # use quick_xml::se::to_string;
1566//! #[derive(Deserialize, Serialize, PartialEq, Debug)]
1567//! struct AnyName {
1568//! #[serde(rename = "$value")]
1569//! field: Vec<usize>,
1570//! }
1571//!
1572//! let obj = AnyName { field: vec![1, 2, 3] };
1573//! // If this object were serialized, it would be represented as "<AnyName>123</AnyName>"
1574//! to_string(&obj).unwrap_err();
1575//!
1576//! let object: AnyName = from_str("<AnyName>123</AnyName>").unwrap();
1577//! assert_eq!(object, AnyName { field: vec![123] });
1578//!
1579//! // `1 2 3` is mapped to a single `usize` element
1580//! // It is impossible to deserialize list of primitives to such field
1581//! from_str::<AnyName>("<AnyName>1 2 3</AnyName>").unwrap_err();
1582//! ```
1583//!
1584//! A particular case of that example is a string `$value` field, which probably
1585//! would be a most used example of that attribute:
1586//!
1587//! ```
1588//! # use serde::{Deserialize, Serialize};
1589//! # use pretty_assertions::assert_eq;
1590//! # use quick_xml::de::from_str;
1591//! # use quick_xml::se::to_string;
1592//! #[derive(Deserialize, Serialize, PartialEq, Debug)]
1593//! struct AnyName {
1594//! #[serde(rename = "$value")]
1595//! field: String,
1596//! }
1597//!
1598//! let obj = AnyName { field: "content".to_string() };
1599//! let xml = to_string(&obj).unwrap();
1600//! assert_eq!(xml, "<AnyName>content</AnyName>");
1601//! ```
1602//!
1603//! ### Structs and sequences of structs
1604//!
1605//! Note, that structures do not have a serializable name as well (name of the
1606//! type is never used), so it is impossible to serialize non-unit struct or
1607//! sequence of non-unit structs in `$value` field. (sequences of) unit structs
1608//! are serialized as empty string, because units itself serializing
1609//! to nothing:
1610//!
1611//! ```
1612//! # use serde::{Deserialize, Serialize};
1613//! # use pretty_assertions::assert_eq;
1614//! # use quick_xml::de::from_str;
1615//! # use quick_xml::se::to_string;
1616//! #[derive(Deserialize, Serialize, PartialEq, Debug)]
1617//! struct Unit;
1618//!
1619//! #[derive(Deserialize, Serialize, PartialEq, Debug)]
1620//! struct AnyName {
1621//! // #[serde(default)] is required to deserialization of empty lists
1622//! // This is a general note, not related to $value
1623//! #[serde(rename = "$value", default)]
1624//! field: Vec<Unit>,
1625//! }
1626//!
1627//! let obj = AnyName { field: vec![Unit, Unit, Unit] };
1628//! let xml = to_string(&obj).unwrap();
1629//! assert_eq!(xml, "<AnyName/>");
1630//!
1631//! let object: AnyName = from_str("<AnyName/>").unwrap();
1632//! assert_eq!(object, AnyName { field: vec![] });
1633//!
1634//! let object: AnyName = from_str("<AnyName></AnyName>").unwrap();
1635//! assert_eq!(object, AnyName { field: vec![] });
1636//!
1637//! let object: AnyName = from_str("<AnyName><A/><B/><C/></AnyName>").unwrap();
1638//! assert_eq!(object, AnyName { field: vec![Unit, Unit, Unit] });
1639//! ```
1640//!
1641//! ### Enums and sequences of enums
1642//!
1643//! Enumerations uses the variant name as an element name:
1644//!
1645//! ```
1646//! # use serde::{Deserialize, Serialize};
1647//! # use pretty_assertions::assert_eq;
1648//! # use quick_xml::de::from_str;
1649//! # use quick_xml::se::to_string;
1650//! #[derive(Deserialize, Serialize, PartialEq, Debug)]
1651//! struct AnyName {
1652//! #[serde(rename = "$value")]
1653//! field: Vec<Enum>,
1654//! }
1655//!
1656//! #[derive(Deserialize, Serialize, PartialEq, Debug)]
1657//! enum Enum { A, B, C }
1658//!
1659//! let obj = AnyName { field: vec![Enum::A, Enum::B, Enum::C] };
1660//! let xml = to_string(&obj).unwrap();
1661//! assert_eq!(
1662//! xml,
1663//! "<AnyName>\
1664//! <A/>\
1665//! <B/>\
1666//! <C/>\
1667//! </AnyName>"
1668//! );
1669//!
1670//! let object: AnyName = from_str(&xml).unwrap();
1671//! assert_eq!(object, obj);
1672//! ```
1673//!
1674//! ----------------------------------------------------------------------------
1675//!
1676//! You can have either `$text` or `$value` field in your structs. Unfortunately,
1677//! that is not enforced, so you can theoretically have both, but you should
1678//! avoid that.
1679//!
1680//!
1681//!
1682//! Frequently Used Patterns
1683//! ========================
1684//!
1685//! Some XML constructs used so frequent, that it is worth to document the recommended
1686//! way to represent them in the Rust. The sections below describes them.
1687//!
1688//! `<element>` lists
1689//! -----------------
1690//! Many XML formats wrap lists of elements in the additional container,
1691//! although this is not required by the XML rules:
1692//!
1693//! ```xml
1694//! <root>
1695//! <field1/>
1696//! <field2/>
1697//! <list><!-- Container -->
1698//! <element/>
1699//! <element/>
1700//! <element/>
1701//! </list>
1702//! <field3/>
1703//! </root>
1704//! ```
1705//! In this case, there is a great desire to describe this XML in this way:
1706//! ```
1707//! /// Represents <element/>
1708//! type Element = ();
1709//!
1710//! /// Represents <root>...</root>
1711//! struct AnyName {
1712//! // Incorrect
1713//! list: Vec<Element>,
1714//! }
1715//! ```
1716//! This will not work, because potentially `<list>` element can have attributes
1717//! and other elements inside. You should define the struct for the `<list>`
1718//! explicitly, as you do that in the XSD for that XML:
1719//! ```
1720//! /// Represents <element/>
1721//! type Element = ();
1722//!
1723//! /// Represents <root>...</root>
1724//! struct AnyName {
1725//! // Correct
1726//! list: List,
1727//! }
1728//! /// Represents <list>...</list>
1729//! struct List {
1730//! element: Vec<Element>,
1731//! }
1732//! ```
1733//!
1734//! If you want to simplify your API, you could write a simple function for unwrapping
1735//! inner list and apply it via [`deserialize_with`]:
1736//!
1737//! ```
1738//! # use pretty_assertions::assert_eq;
1739//! use quick_xml::de::from_str;
1740//! use serde::{Deserialize, Deserializer};
1741//!
1742//! /// Represents <element/>
1743//! type Element = ();
1744//!
1745//! /// Represents <root>...</root>
1746//! #[derive(Deserialize, Debug, PartialEq)]
1747//! struct AnyName {
1748//! #[serde(deserialize_with = "unwrap_list")]
1749//! list: Vec<Element>,
1750//! }
1751//!
1752//! fn unwrap_list<'de, D>(deserializer: D) -> Result<Vec<Element>, D::Error>
1753//! where
1754//! D: Deserializer<'de>,
1755//! {
1756//! /// Represents <list>...</list>
1757//! #[derive(Deserialize)]
1758//! struct List {
1759//! // default allows empty list
1760//! #[serde(default)]
1761//! element: Vec<Element>,
1762//! }
1763//! Ok(List::deserialize(deserializer)?.element)
1764//! }
1765//!
1766//! assert_eq!(
1767//! AnyName { list: vec![(), (), ()] },
1768//! from_str("
1769//! <root>
1770//! <list>
1771//! <element/>
1772//! <element/>
1773//! <element/>
1774//! </list>
1775//! </root>
1776//! ").unwrap(),
1777//! );
1778//! ```
1779//!
1780//! Instead of writing such functions manually, you also could try <https://lib.rs/crates/serde-query>.
1781//!
1782//! Overlapped (Out-of-Order) Elements
1783//! ----------------------------------
1784//! In the case that the list might contain tags that are overlapped with
1785//! tags that do not correspond to the list (this is a usual case in XML
1786//! documents) like this:
1787//! ```xml
1788//! <any-name>
1789//! <item/>
1790//! <another-item/>
1791//! <item/>
1792//! <item/>
1793//! </any-name>
1794//! ```
1795//! you should enable the [`overlapped-lists`] feature to make it possible
1796//! to deserialize this to:
1797//! ```no_run
1798//! # use serde::Deserialize;
1799//! #[derive(Deserialize)]
1800//! #[serde(rename_all = "kebab-case")]
1801//! struct AnyName {
1802//! item: Vec<()>,
1803//! another_item: (),
1804//! }
1805//! ```
1806//!
1807//!
1808//! Internally Tagged Enums
1809//! -----------------------
1810//! [Tagged enums] are currently not supported because of an issue in the Serde
1811//! design (see [serde#1183] and [quick-xml#586]) and missing optimizations in
1812//! Serde which could be useful for XML parsing ([serde#1495]). This can be worked
1813//! around by manually implementing deserialize with `#[serde(deserialize_with = "func")]`
1814//! or implementing [`Deserialize`], but this can get very tedious very fast for
1815//! files with large amounts of tagged enums. To help with this issue quick-xml
1816//! provides a macro [`impl_deserialize_for_internally_tagged_enum!`]. See the
1817//! macro documentation for details.
1818//!
1819//!
1820//! [`overlapped-lists`]: ../index.html#overlapped-lists
1821//! [specification]: https://www.w3.org/TR/xmlschema11-1/#Simple_Type_Definition
1822//! [`deserialize_with`]: https://serde.rs/field-attrs.html#deserialize_with
1823//! [#497]: https://github.com/tafia/quick-xml/issues/497
1824//! [`Serializer::serialize_unit_variant`]: serde::Serializer::serialize_unit_variant
1825//! [`Deserializer::deserialize_enum`]: serde::Deserializer::deserialize_enum
1826//! [`SeError::Unsupported`]: crate::errors::serialize::SeError::Unsupported
1827//! [Tagged enums]: https://serde.rs/enum-representations.html#internally-tagged
1828//! [serde#1183]: https://github.com/serde-rs/serde/issues/1183
1829//! [serde#1495]: https://github.com/serde-rs/serde/issues/1495
1830//! [quick-xml#586]: https://github.com/tafia/quick-xml/issues/586
1831//! [`impl_deserialize_for_internally_tagged_enum!`]: crate::impl_deserialize_for_internally_tagged_enum
1832
1833// Macros should be defined before the modules that using them
1834// Also, macros should be imported before using them
1835use serde::serde_if_integer128;
1836
1837macro_rules! deserialize_num {
1838 ($deserialize:ident => $visit:ident, $($mut:tt)?) => {
1839 fn $deserialize<V>($($mut)? self, visitor: V) -> Result<V::Value, DeError>
1840 where
1841 V: Visitor<'de>,
1842 {
1843 // No need to unescape because valid integer representations cannot be escaped
1844 let text = self.read_string()?;
1845 match text.parse() {
1846 Ok(number) => visitor.$visit(number),
1847 Err(_) => match text {
1848 Cow::Borrowed(t) => visitor.visit_str(t),
1849 Cow::Owned(t) => visitor.visit_string(t),
1850 }
1851 }
1852 }
1853 };
1854}
1855
1856/// Implement deserialization methods for scalar types, such as numbers, strings,
1857/// byte arrays, booleans and identifiers.
1858macro_rules! deserialize_primitives {
1859 ($($mut:tt)?) => {
1860 deserialize_num!(deserialize_i8 => visit_i8, $($mut)?);
1861 deserialize_num!(deserialize_i16 => visit_i16, $($mut)?);
1862 deserialize_num!(deserialize_i32 => visit_i32, $($mut)?);
1863 deserialize_num!(deserialize_i64 => visit_i64, $($mut)?);
1864
1865 deserialize_num!(deserialize_u8 => visit_u8, $($mut)?);
1866 deserialize_num!(deserialize_u16 => visit_u16, $($mut)?);
1867 deserialize_num!(deserialize_u32 => visit_u32, $($mut)?);
1868 deserialize_num!(deserialize_u64 => visit_u64, $($mut)?);
1869
1870 serde_if_integer128! {
1871 deserialize_num!(deserialize_i128 => visit_i128, $($mut)?);
1872 deserialize_num!(deserialize_u128 => visit_u128, $($mut)?);
1873 }
1874
1875 deserialize_num!(deserialize_f32 => visit_f32, $($mut)?);
1876 deserialize_num!(deserialize_f64 => visit_f64, $($mut)?);
1877
1878 fn deserialize_bool<V>($($mut)? self, visitor: V) -> Result<V::Value, DeError>
1879 where
1880 V: Visitor<'de>,
1881 {
1882 let text = match self.read_string()? {
1883 Cow::Borrowed(s) => CowRef::Input(s),
1884 Cow::Owned(s) => CowRef::Owned(s),
1885 };
1886 text.deserialize_bool(visitor)
1887 }
1888
1889 /// Character represented as [strings](#method.deserialize_str).
1890 #[inline]
1891 fn deserialize_char<V>(self, visitor: V) -> Result<V::Value, DeError>
1892 where
1893 V: Visitor<'de>,
1894 {
1895 self.deserialize_str(visitor)
1896 }
1897
1898 fn deserialize_str<V>($($mut)? self, visitor: V) -> Result<V::Value, DeError>
1899 where
1900 V: Visitor<'de>,
1901 {
1902 let text = self.read_string()?;
1903 match text {
1904 Cow::Borrowed(string) => visitor.visit_borrowed_str(string),
1905 Cow::Owned(string) => visitor.visit_string(string),
1906 }
1907 }
1908
1909 /// Representation of owned strings the same as [non-owned](#method.deserialize_str).
1910 #[inline]
1911 fn deserialize_string<V>(self, visitor: V) -> Result<V::Value, DeError>
1912 where
1913 V: Visitor<'de>,
1914 {
1915 self.deserialize_str(visitor)
1916 }
1917
1918 /// Forwards deserialization to the [`deserialize_any`](#method.deserialize_any).
1919 #[inline]
1920 fn deserialize_bytes<V>(self, visitor: V) -> Result<V::Value, DeError>
1921 where
1922 V: Visitor<'de>,
1923 {
1924 self.deserialize_any(visitor)
1925 }
1926
1927 /// Forwards deserialization to the [`deserialize_bytes`](#method.deserialize_bytes).
1928 #[inline]
1929 fn deserialize_byte_buf<V>(self, visitor: V) -> Result<V::Value, DeError>
1930 where
1931 V: Visitor<'de>,
1932 {
1933 self.deserialize_bytes(visitor)
1934 }
1935
1936 /// Representation of the named units the same as [unnamed units](#method.deserialize_unit).
1937 #[inline]
1938 fn deserialize_unit_struct<V>(
1939 self,
1940 _name: &'static str,
1941 visitor: V,
1942 ) -> Result<V::Value, DeError>
1943 where
1944 V: Visitor<'de>,
1945 {
1946 self.deserialize_unit(visitor)
1947 }
1948
1949 /// Representation of tuples the same as [sequences](#method.deserialize_seq).
1950 #[inline]
1951 fn deserialize_tuple<V>(self, _len: usize, visitor: V) -> Result<V::Value, DeError>
1952 where
1953 V: Visitor<'de>,
1954 {
1955 self.deserialize_seq(visitor)
1956 }
1957
1958 /// Representation of named tuples the same as [unnamed tuples](#method.deserialize_tuple).
1959 #[inline]
1960 fn deserialize_tuple_struct<V>(
1961 self,
1962 _name: &'static str,
1963 len: usize,
1964 visitor: V,
1965 ) -> Result<V::Value, DeError>
1966 where
1967 V: Visitor<'de>,
1968 {
1969 self.deserialize_tuple(len, visitor)
1970 }
1971
1972 /// Forwards deserialization to the [`deserialize_struct`](#method.deserialize_struct)
1973 /// with empty name and fields.
1974 #[inline]
1975 fn deserialize_map<V>(self, visitor: V) -> Result<V::Value, DeError>
1976 where
1977 V: Visitor<'de>,
1978 {
1979 self.deserialize_struct("", &[], visitor)
1980 }
1981
1982 /// Identifiers represented as [strings](#method.deserialize_str).
1983 #[inline]
1984 fn deserialize_identifier<V>(self, visitor: V) -> Result<V::Value, DeError>
1985 where
1986 V: Visitor<'de>,
1987 {
1988 self.deserialize_str(visitor)
1989 }
1990
1991 /// Forwards deserialization to the [`deserialize_unit`](#method.deserialize_unit).
1992 #[inline]
1993 fn deserialize_ignored_any<V>(self, visitor: V) -> Result<V::Value, DeError>
1994 where
1995 V: Visitor<'de>,
1996 {
1997 self.deserialize_unit(visitor)
1998 }
1999 };
2000}
2001
2002mod key;
2003mod map;
2004mod resolver;
2005mod simple_type;
2006mod text;
2007mod var;
2008
2009pub use self::resolver::{EntityResolver, PredefinedEntityResolver};
2010pub use self::simple_type::SimpleTypeDeserializer;
2011pub use crate::errors::serialize::DeError;
2012
2013use crate::{
2014 de::map::ElementMapAccess,
2015 encoding::Decoder,
2016 errors::Error,
2017 events::{BytesCData, BytesEnd, BytesStart, BytesText, Event},
2018 name::QName,
2019 reader::Reader,
2020 utils::CowRef,
2021};
2022use serde::de::{
2023 self, Deserialize, DeserializeOwned, DeserializeSeed, IntoDeserializer, SeqAccess, Visitor,
2024};
2025use std::borrow::Cow;
2026#[cfg(feature = "overlapped-lists")]
2027use std::collections::VecDeque;
2028use std::io::BufRead;
2029use std::mem::replace;
2030#[cfg(feature = "overlapped-lists")]
2031use std::num::NonZeroUsize;
2032use std::ops::Deref;
2033
2034/// Data represented by a text node or a CDATA node. XML markup is not expected
2035pub(crate) const TEXT_KEY: &str = "$text";
2036/// Data represented by any XML markup inside
2037pub(crate) const VALUE_KEY: &str = "$value";
2038
2039/// Decoded and concatenated content of consequent [`Text`] and [`CData`]
2040/// events. _Consequent_ means that events should follow each other or be
2041/// delimited only by (any count of) [`Comment`] or [`PI`] events.
2042///
2043/// Internally text is stored in `Cow<str>`. Cloning of text is cheap while it
2044/// is borrowed and makes copies of data when it is owned.
2045///
2046/// [`Text`]: Event::Text
2047/// [`CData`]: Event::CData
2048/// [`Comment`]: Event::Comment
2049/// [`PI`]: Event::PI
2050#[derive(Clone, Debug, PartialEq, Eq)]
2051pub struct Text<'a> {
2052 text: Cow<'a, str>,
2053}
2054
2055impl<'a> Deref for Text<'a> {
2056 type Target = str;
2057
2058 #[inline]
2059 fn deref(&self) -> &Self::Target {
2060 self.text.deref()
2061 }
2062}
2063
2064impl<'a> From<&'a str> for Text<'a> {
2065 #[inline]
2066 fn from(text: &'a str) -> Self {
2067 Self {
2068 text: Cow::Borrowed(text),
2069 }
2070 }
2071}
2072
2073impl<'a> From<String> for Text<'a> {
2074 #[inline]
2075 fn from(text: String) -> Self {
2076 Self {
2077 text: Cow::Owned(text),
2078 }
2079 }
2080}
2081
2082impl<'a> From<Cow<'a, str>> for Text<'a> {
2083 #[inline]
2084 fn from(text: Cow<'a, str>) -> Self {
2085 Self { text }
2086 }
2087}
2088
2089////////////////////////////////////////////////////////////////////////////////////////////////////
2090
2091/// Simplified event which contains only these variants that used by deserializer
2092#[derive(Clone, Debug, PartialEq, Eq)]
2093pub enum DeEvent<'a> {
2094 /// Start tag (with attributes) `<tag attr="value">`.
2095 Start(BytesStart<'a>),
2096 /// End tag `</tag>`.
2097 End(BytesEnd<'a>),
2098 /// Decoded and concatenated content of consequent [`Text`] and [`CData`]
2099 /// events. _Consequent_ means that events should follow each other or be
2100 /// delimited only by (any count of) [`Comment`] or [`PI`] events.
2101 ///
2102 /// [`Text`]: Event::Text
2103 /// [`CData`]: Event::CData
2104 /// [`Comment`]: Event::Comment
2105 /// [`PI`]: Event::PI
2106 Text(Text<'a>),
2107 /// End of XML document.
2108 Eof,
2109}
2110
2111////////////////////////////////////////////////////////////////////////////////////////////////////
2112
2113/// Simplified event which contains only these variants that used by deserializer,
2114/// but [`Text`] events not yet fully processed.
2115///
2116/// [`Text`] events should be trimmed if they does not surrounded by the other
2117/// [`Text`] or [`CData`] events. This event contains intermediate state of [`Text`]
2118/// event, where they are trimmed from the start, but not from the end. To trim
2119/// end spaces we should lookahead by one deserializer event (i. e. skip all
2120/// comments and processing instructions).
2121///
2122/// [`Text`]: Event::Text
2123/// [`CData`]: Event::CData
2124#[derive(Clone, Debug, PartialEq, Eq)]
2125pub enum PayloadEvent<'a> {
2126 /// Start tag (with attributes) `<tag attr="value">`.
2127 Start(BytesStart<'a>),
2128 /// End tag `</tag>`.
2129 End(BytesEnd<'a>),
2130 /// Escaped character data between tags.
2131 Text(BytesText<'a>),
2132 /// Unescaped character data stored in `<![CDATA[...]]>`.
2133 CData(BytesCData<'a>),
2134 /// Document type definition data (DTD) stored in `<!DOCTYPE ...>`.
2135 DocType(BytesText<'a>),
2136 /// End of XML document.
2137 Eof,
2138}
2139
2140impl<'a> PayloadEvent<'a> {
2141 /// Ensures that all data is owned to extend the object's lifetime if necessary.
2142 #[inline]
2143 fn into_owned(self) -> PayloadEvent<'static> {
2144 match self {
2145 PayloadEvent::Start(e) => PayloadEvent::Start(e.into_owned()),
2146 PayloadEvent::End(e) => PayloadEvent::End(e.into_owned()),
2147 PayloadEvent::Text(e) => PayloadEvent::Text(e.into_owned()),
2148 PayloadEvent::CData(e) => PayloadEvent::CData(e.into_owned()),
2149 PayloadEvent::DocType(e) => PayloadEvent::DocType(e.into_owned()),
2150 PayloadEvent::Eof => PayloadEvent::Eof,
2151 }
2152 }
2153}
2154
2155/// An intermediate reader that consumes [`PayloadEvent`]s and produces final [`DeEvent`]s.
2156/// [`PayloadEvent::Text`] events, that followed by any event except
2157/// [`PayloadEvent::Text`] or [`PayloadEvent::CData`], are trimmed from the end.
2158struct XmlReader<'i, R: XmlRead<'i>, E: EntityResolver = PredefinedEntityResolver> {
2159 /// A source of low-level XML events
2160 reader: R,
2161 /// Intermediate event, that could be returned by the next call to `next()`.
2162 /// If that is the `Text` event then leading spaces already trimmed, but
2163 /// trailing spaces is not. Before the event will be returned, trimming of
2164 /// the spaces could be necessary
2165 lookahead: Result<PayloadEvent<'i>, DeError>,
2166
2167 /// Used to resolve unknown entities that would otherwise cause the parser
2168 /// to return an [`EscapeError::UnrecognizedEntity`] error.
2169 ///
2170 /// [`EscapeError::UnrecognizedEntity`]: crate::escape::EscapeError::UnrecognizedEntity
2171 entity_resolver: E,
2172}
2173
2174impl<'i, R: XmlRead<'i>, E: EntityResolver> XmlReader<'i, R, E> {
2175 fn new(mut reader: R, entity_resolver: E) -> Self {
2176 // Lookahead by one event immediately, so we do not need to check in the
2177 // loop if we need lookahead or not
2178 let lookahead = reader.next();
2179
2180 Self {
2181 reader,
2182 lookahead,
2183 entity_resolver,
2184 }
2185 }
2186
2187 /// Returns `true` if all events was consumed
2188 const fn is_empty(&self) -> bool {
2189 matches!(self.lookahead, Ok(PayloadEvent::Eof))
2190 }
2191
2192 /// Read next event and put it in lookahead, return the current lookahead
2193 #[inline(always)]
2194 fn next_impl(&mut self) -> Result<PayloadEvent<'i>, DeError> {
2195 replace(&mut self.lookahead, self.reader.next())
2196 }
2197
2198 /// Returns `true` when next event is not a text event in any form.
2199 #[inline(always)]
2200 const fn current_event_is_last_text(&self) -> bool {
2201 // If next event is a text or CDATA, we should not trim trailing spaces
2202 !matches!(
2203 self.lookahead,
2204 Ok(PayloadEvent::Text(_)) | Ok(PayloadEvent::CData(_))
2205 )
2206 }
2207
2208 /// Read all consequent [`Text`] and [`CData`] events until non-text event
2209 /// occurs. Content of all events would be appended to `result` and returned
2210 /// as [`DeEvent::Text`].
2211 ///
2212 /// [`Text`]: PayloadEvent::Text
2213 /// [`CData`]: PayloadEvent::CData
2214 fn drain_text(&mut self, mut result: Cow<'i, str>) -> Result<DeEvent<'i>, DeError> {
2215 loop {
2216 if self.current_event_is_last_text() {
2217 break;
2218 }
2219
2220 match self.next_impl()? {
2221 PayloadEvent::Text(mut e) => {
2222 if self.current_event_is_last_text() {
2223 // FIXME: Actually, we should trim after decoding text, but now we trim before
2224 e.inplace_trim_end();
2225 }
2226 result
2227 .to_mut()
2228 .push_str(&e.unescape_with(|entity| self.entity_resolver.resolve(entity))?);
2229 }
2230 PayloadEvent::CData(e) => result.to_mut().push_str(&e.decode()?),
2231
2232 // SAFETY: current_event_is_last_text checks that event is Text or CData
2233 _ => unreachable!("Only `Text` and `CData` events can come here"),
2234 }
2235 }
2236 Ok(DeEvent::Text(Text { text: result }))
2237 }
2238
2239 /// Return an input-borrowing event.
2240 fn next(&mut self) -> Result<DeEvent<'i>, DeError> {
2241 loop {
2242 return match self.next_impl()? {
2243 PayloadEvent::Start(e) => Ok(DeEvent::Start(e)),
2244 PayloadEvent::End(e) => Ok(DeEvent::End(e)),
2245 PayloadEvent::Text(mut e) => {
2246 if self.current_event_is_last_text() && e.inplace_trim_end() {
2247 // FIXME: Actually, we should trim after decoding text, but now we trim before
2248 continue;
2249 }
2250 self.drain_text(e.unescape_with(|entity| self.entity_resolver.resolve(entity))?)
2251 }
2252 PayloadEvent::CData(e) => self.drain_text(e.decode()?),
2253 PayloadEvent::DocType(e) => {
2254 self.entity_resolver
2255 .capture(e)
2256 .map_err(|err| DeError::Custom(format!("cannot parse DTD: {}", err)))?;
2257 continue;
2258 }
2259 PayloadEvent::Eof => Ok(DeEvent::Eof),
2260 };
2261 }
2262 }
2263
2264 #[inline]
2265 fn read_to_end(&mut self, name: QName) -> Result<(), DeError> {
2266 match self.lookahead {
2267 // We pre-read event with the same name that is required to be skipped.
2268 // First call of `read_to_end` will end out pre-read event, the second
2269 // will consume other events
2270 Ok(PayloadEvent::Start(ref e)) if e.name() == name => {
2271 let result1 = self.reader.read_to_end(name);
2272 let result2 = self.reader.read_to_end(name);
2273
2274 // In case of error `next_impl` returns `Eof`
2275 let _ = self.next_impl();
2276 result1?;
2277 result2?;
2278 }
2279 // We pre-read event with the same name that is required to be skipped.
2280 // Because this is end event, we already consume the whole tree, so
2281 // nothing to do, just update lookahead
2282 Ok(PayloadEvent::End(ref e)) if e.name() == name => {
2283 let _ = self.next_impl();
2284 }
2285 Ok(_) => {
2286 let result = self.reader.read_to_end(name);
2287
2288 // In case of error `next_impl` returns `Eof`
2289 let _ = self.next_impl();
2290 result?;
2291 }
2292 // Read next lookahead event, unpack error from the current lookahead
2293 Err(_) => {
2294 self.next_impl()?;
2295 }
2296 }
2297 Ok(())
2298 }
2299
2300 #[inline]
2301 fn decoder(&self) -> Decoder {
2302 self.reader.decoder()
2303 }
2304}
2305
2306////////////////////////////////////////////////////////////////////////////////////////////////////
2307
2308/// Deserialize an instance of type `T` from a string of XML text.
2309pub fn from_str<'de, T>(s: &'de str) -> Result<T, DeError>
2310where
2311 T: Deserialize<'de>,
2312{
2313 let mut de = Deserializer::from_str(s);
2314 T::deserialize(&mut de)
2315}
2316
2317/// Deserialize from a reader. This method will do internal copies of data
2318/// read from `reader`. If you want have a `&str` input and want to borrow
2319/// as much as possible, use [`from_str`].
2320pub fn from_reader<R, T>(reader: R) -> Result<T, DeError>
2321where
2322 R: BufRead,
2323 T: DeserializeOwned,
2324{
2325 let mut de = Deserializer::from_reader(reader);
2326 T::deserialize(&mut de)
2327}
2328
2329////////////////////////////////////////////////////////////////////////////////////////////////////
2330
2331/// A structure that deserializes XML into Rust values.
2332pub struct Deserializer<'de, R, E: EntityResolver = PredefinedEntityResolver>
2333where
2334 R: XmlRead<'de>,
2335{
2336 /// An XML reader that streams events into this deserializer
2337 reader: XmlReader<'de, R, E>,
2338
2339 /// When deserializing sequences sometimes we have to skip unwanted events.
2340 /// That events should be stored and then replayed. This is a replay buffer,
2341 /// that streams events while not empty. When it exhausted, events will
2342 /// requested from [`Self::reader`].
2343 #[cfg(feature = "overlapped-lists")]
2344 read: VecDeque<DeEvent<'de>>,
2345 /// When deserializing sequences sometimes we have to skip events, because XML
2346 /// is tolerant to elements order and even if in the XSD order is strictly
2347 /// specified (using `xs:sequence`) most of XML parsers allows order violations.
2348 /// That means, that elements, forming a sequence, could be overlapped with
2349 /// other elements, do not related to that sequence.
2350 ///
2351 /// In order to support this, deserializer will scan events and skip unwanted
2352 /// events, store them here. After call [`Self::start_replay()`] all events
2353 /// moved from this to [`Self::read`].
2354 #[cfg(feature = "overlapped-lists")]
2355 write: VecDeque<DeEvent<'de>>,
2356 /// Maximum number of events that can be skipped when processing sequences
2357 /// that occur out-of-order. This field is used to prevent potential
2358 /// denial-of-service (DoS) attacks which could cause infinite memory
2359 /// consumption when parsing a very large amount of XML into a sequence field.
2360 #[cfg(feature = "overlapped-lists")]
2361 limit: Option<NonZeroUsize>,
2362
2363 #[cfg(not(feature = "overlapped-lists"))]
2364 peek: Option<DeEvent<'de>>,
2365
2366 /// Buffer to store attribute name as a field name exposed to serde consumers
2367 key_buf: String,
2368}
2369
2370impl<'de, R, E> Deserializer<'de, R, E>
2371where
2372 R: XmlRead<'de>,
2373 E: EntityResolver,
2374{
2375 /// Create an XML deserializer from one of the possible quick_xml input sources.
2376 ///
2377 /// Typically it is more convenient to use one of these methods instead:
2378 ///
2379 /// - [`Deserializer::from_str`]
2380 /// - [`Deserializer::from_reader`]
2381 fn new(reader: R, entity_resolver: E) -> Self {
2382 Self {
2383 reader: XmlReader::new(reader, entity_resolver),
2384
2385 #[cfg(feature = "overlapped-lists")]
2386 read: VecDeque::new(),
2387 #[cfg(feature = "overlapped-lists")]
2388 write: VecDeque::new(),
2389 #[cfg(feature = "overlapped-lists")]
2390 limit: None,
2391
2392 #[cfg(not(feature = "overlapped-lists"))]
2393 peek: None,
2394
2395 key_buf: String::new(),
2396 }
2397 }
2398
2399 /// Returns `true` if all events was consumed.
2400 pub fn is_empty(&self) -> bool {
2401 #[cfg(feature = "overlapped-lists")]
2402 if self.read.is_empty() {
2403 return self.reader.is_empty();
2404 }
2405 #[cfg(not(feature = "overlapped-lists"))]
2406 if self.peek.is_none() {
2407 return self.reader.is_empty();
2408 }
2409 false
2410 }
2411
2412 /// Returns the underlying XML reader.
2413 ///
2414 /// ```
2415 /// # use pretty_assertions::assert_eq;
2416 /// use serde::Deserialize;
2417 /// use quick_xml::de::Deserializer;
2418 /// use quick_xml::Reader;
2419 ///
2420 /// #[derive(Deserialize)]
2421 /// struct SomeStruct {
2422 /// field1: String,
2423 /// field2: String,
2424 /// }
2425 ///
2426 /// // Try to deserialize from broken XML
2427 /// let mut de = Deserializer::from_str(
2428 /// "<SomeStruct><field1><field2></SomeStruct>"
2429 /// // 0 ^= 28 ^= 41
2430 /// );
2431 ///
2432 /// let err = SomeStruct::deserialize(&mut de);
2433 /// assert!(err.is_err());
2434 ///
2435 /// let reader: &Reader<_> = de.get_ref().get_ref();
2436 ///
2437 /// assert_eq!(reader.error_position(), 28);
2438 /// assert_eq!(reader.buffer_position(), 41);
2439 /// ```
2440 pub const fn get_ref(&self) -> &R {
2441 &self.reader.reader
2442 }
2443
2444 /// Set the maximum number of events that could be skipped during deserialization
2445 /// of sequences.
2446 ///
2447 /// If `<element>` contains more than specified nested elements, `$text` or
2448 /// CDATA nodes, then [`DeError::TooManyEvents`] will be returned during
2449 /// deserialization of sequence field (any type that uses [`deserialize_seq`]
2450 /// for the deserialization, for example, `Vec<T>`).
2451 ///
2452 /// This method can be used to prevent a [DoS] attack and infinite memory
2453 /// consumption when parsing a very large XML to a sequence field.
2454 ///
2455 /// It is strongly recommended to set limit to some value when you parse data
2456 /// from untrusted sources. You should choose a value that your typical XMLs
2457 /// can have _between_ different elements that corresponds to the same sequence.
2458 ///
2459 /// # Examples
2460 ///
2461 /// Let's imagine, that we deserialize such structure:
2462 /// ```
2463 /// struct List {
2464 /// item: Vec<()>,
2465 /// }
2466 /// ```
2467 ///
2468 /// The XML that we try to parse look like this:
2469 /// ```xml
2470 /// <any-name>
2471 /// <item/>
2472 /// <!-- Bufferization starts at this point -->
2473 /// <another-item>
2474 /// <some-element>with text</some-element>
2475 /// <yet-another-element/>
2476 /// </another-item>
2477 /// <!-- Buffer will be emptied at this point; 7 events were buffered -->
2478 /// <item/>
2479 /// <!-- There is nothing to buffer, because elements follows each other -->
2480 /// <item/>
2481 /// </any-name>
2482 /// ```
2483 ///
2484 /// There, when we deserialize the `item` field, we need to buffer 7 events,
2485 /// before we can deserialize the second `<item/>`:
2486 ///
2487 /// - `<another-item>`
2488 /// - `<some-element>`
2489 /// - `$text(with text)`
2490 /// - `</some-element>`
2491 /// - `<yet-another-element/>` (virtual start event)
2492 /// - `<yet-another-element/>` (virtual end event)
2493 /// - `</another-item>`
2494 ///
2495 /// Note, that `<yet-another-element/>` internally represented as 2 events:
2496 /// one for the start tag and one for the end tag. In the future this can be
2497 /// eliminated, but for now we use [auto-expanding feature] of a reader,
2498 /// because this simplifies deserializer code.
2499 ///
2500 /// [`deserialize_seq`]: serde::Deserializer::deserialize_seq
2501 /// [DoS]: https://en.wikipedia.org/wiki/Denial-of-service_attack
2502 /// [auto-expanding feature]: crate::reader::Config::expand_empty_elements
2503 #[cfg(feature = "overlapped-lists")]
2504 pub fn event_buffer_size(&mut self, limit: Option<NonZeroUsize>) -> &mut Self {
2505 self.limit = limit;
2506 self
2507 }
2508
2509 #[cfg(feature = "overlapped-lists")]
2510 fn peek(&mut self) -> Result<&DeEvent<'de>, DeError> {
2511 if self.read.is_empty() {
2512 self.read.push_front(self.reader.next()?);
2513 }
2514 if let Some(event) = self.read.front() {
2515 return Ok(event);
2516 }
2517 // SAFETY: `self.read` was filled in the code above.
2518 // NOTE: Can be replaced with `unsafe { std::hint::unreachable_unchecked() }`
2519 // if unsafe code will be allowed
2520 unreachable!()
2521 }
2522 #[cfg(not(feature = "overlapped-lists"))]
2523 fn peek(&mut self) -> Result<&DeEvent<'de>, DeError> {
2524 if self.peek.is_none() {
2525 self.peek = Some(self.reader.next()?);
2526 }
2527 match self.peek.as_ref() {
2528 Some(v) => Ok(v),
2529 // SAFETY: a `None` variant for `self.peek` would have been replaced
2530 // by a `Some` variant in the code above.
2531 // TODO: Can be replaced with `unsafe { std::hint::unreachable_unchecked() }`
2532 // if unsafe code will be allowed
2533 None => unreachable!(),
2534 }
2535 }
2536
2537 fn next(&mut self) -> Result<DeEvent<'de>, DeError> {
2538 // Replay skipped or peeked events
2539 #[cfg(feature = "overlapped-lists")]
2540 if let Some(event) = self.read.pop_front() {
2541 return Ok(event);
2542 }
2543 #[cfg(not(feature = "overlapped-lists"))]
2544 if let Some(e) = self.peek.take() {
2545 return Ok(e);
2546 }
2547 self.reader.next()
2548 }
2549
2550 /// Returns the mark after which all events, skipped by [`Self::skip()`] call,
2551 /// should be replayed after calling [`Self::start_replay()`].
2552 #[cfg(feature = "overlapped-lists")]
2553 #[inline]
2554 #[must_use = "returned checkpoint should be used in `start_replay`"]
2555 fn skip_checkpoint(&self) -> usize {
2556 self.write.len()
2557 }
2558
2559 /// Extracts XML tree of events from and stores them in the skipped events
2560 /// buffer from which they can be retrieved later. You MUST call
2561 /// [`Self::start_replay()`] after calling this to give access to the skipped
2562 /// events and release internal buffers.
2563 #[cfg(feature = "overlapped-lists")]
2564 fn skip(&mut self) -> Result<(), DeError> {
2565 let event = self.next()?;
2566 self.skip_event(event)?;
2567 match self.write.back() {
2568 // Skip all subtree, if we skip a start event
2569 Some(DeEvent::Start(e)) => {
2570 let end = e.name().as_ref().to_owned();
2571 let mut depth = 0;
2572 loop {
2573 let event = self.next()?;
2574 match event {
2575 DeEvent::Start(ref e) if e.name().as_ref() == end => {
2576 self.skip_event(event)?;
2577 depth += 1;
2578 }
2579 DeEvent::End(ref e) if e.name().as_ref() == end => {
2580 self.skip_event(event)?;
2581 if depth == 0 {
2582 break;
2583 }
2584 depth -= 1;
2585 }
2586 DeEvent::Eof => {
2587 self.skip_event(event)?;
2588 break;
2589 }
2590 _ => self.skip_event(event)?,
2591 }
2592 }
2593 }
2594 _ => (),
2595 }
2596 Ok(())
2597 }
2598
2599 #[cfg(feature = "overlapped-lists")]
2600 #[inline]
2601 fn skip_event(&mut self, event: DeEvent<'de>) -> Result<(), DeError> {
2602 if let Some(max) = self.limit {
2603 if self.write.len() >= max.get() {
2604 return Err(DeError::TooManyEvents(max));
2605 }
2606 }
2607 self.write.push_back(event);
2608 Ok(())
2609 }
2610
2611 /// Moves buffered events, skipped after given `checkpoint` from [`Self::write`]
2612 /// skip buffer to [`Self::read`] buffer.
2613 ///
2614 /// After calling this method, [`Self::peek()`] and [`Self::next()`] starts
2615 /// return events that was skipped previously by calling [`Self::skip()`],
2616 /// and only when all that events will be consumed, the deserializer starts
2617 /// to drain events from underlying reader.
2618 ///
2619 /// This method MUST be called if any number of [`Self::skip()`] was called
2620 /// after [`Self::new()`] or `start_replay()` or you'll lost events.
2621 #[cfg(feature = "overlapped-lists")]
2622 fn start_replay(&mut self, checkpoint: usize) {
2623 if checkpoint == 0 {
2624 self.write.append(&mut self.read);
2625 std::mem::swap(&mut self.read, &mut self.write);
2626 } else {
2627 let mut read = self.write.split_off(checkpoint);
2628 read.append(&mut self.read);
2629 self.read = read;
2630 }
2631 }
2632
2633 #[inline]
2634 fn read_string(&mut self) -> Result<Cow<'de, str>, DeError> {
2635 self.read_string_impl(true)
2636 }
2637
2638 /// Consumes consequent [`Text`] and [`CData`] (both a referred below as a _text_)
2639 /// events, merge them into one string. If there are no such events, returns
2640 /// an empty string.
2641 ///
2642 /// If `allow_start` is `false`, then only text events are consumed, for other
2643 /// events an error is returned (see table below).
2644 ///
2645 /// If `allow_start` is `true`, then two or three events are expected:
2646 /// - [`DeEvent::Start`];
2647 /// - _(optional)_ [`DeEvent::Text`] which content is returned;
2648 /// - [`DeEvent::End`]. If text event was missed, an empty string is returned.
2649 ///
2650 /// Corresponding events are consumed.
2651 ///
2652 /// # Handling events
2653 ///
2654 /// The table below shows how events is handled by this method:
2655 ///
2656 /// |Event |XML |Handling
2657 /// |------------------|---------------------------|----------------------------------------
2658 /// |[`DeEvent::Start`]|`<tag>...</tag>` |if `allow_start == true`, result determined by the second table, otherwise emits [`UnexpectedStart("tag")`](DeError::UnexpectedStart)
2659 /// |[`DeEvent::End`] |`</any-tag>` |This is impossible situation, the method will panic if it happens
2660 /// |[`DeEvent::Text`] |`text content` or `<![CDATA[cdata content]]>` (probably mixed)|Returns event content unchanged
2661 /// |[`DeEvent::Eof`] | |Emits [`UnexpectedEof`](DeError::UnexpectedEof)
2662 ///
2663 /// Second event, consumed if [`DeEvent::Start`] was received and `allow_start == true`:
2664 ///
2665 /// |Event |XML |Handling
2666 /// |------------------|---------------------------|----------------------------------------------------------------------------------
2667 /// |[`DeEvent::Start`]|`<any-tag>...</any-tag>` |Emits [`UnexpectedStart("any-tag")`](DeError::UnexpectedStart)
2668 /// |[`DeEvent::End`] |`</tag>` |Returns an empty slice. The reader guarantee that tag will match the open one
2669 /// |[`DeEvent::Text`] |`text content` or `<![CDATA[cdata content]]>` (probably mixed)|Returns event content unchanged, expects the `</tag>` after that
2670 /// |[`DeEvent::Eof`] | |Emits [`InvalidXml(IllFormed(MissingEndTag))`](DeError::InvalidXml)
2671 ///
2672 /// [`Text`]: Event::Text
2673 /// [`CData`]: Event::CData
2674 fn read_string_impl(&mut self, allow_start: bool) -> Result<Cow<'de, str>, DeError> {
2675 match self.next()? {
2676 DeEvent::Text(e) => Ok(e.text),
2677 // allow one nested level
2678 DeEvent::Start(e) if allow_start => self.read_text(e.name()),
2679 DeEvent::Start(e) => Err(DeError::UnexpectedStart(e.name().as_ref().to_owned())),
2680 // SAFETY: The reader is guaranteed that we don't have unmatched tags
2681 // If we here, then out deserializer has a bug
2682 DeEvent::End(e) => unreachable!("{:?}", e),
2683 DeEvent::Eof => Err(DeError::UnexpectedEof),
2684 }
2685 }
2686 /// Consumes one [`DeEvent::Text`] event and ensures that it is followed by the
2687 /// [`DeEvent::End`] event.
2688 ///
2689 /// # Parameters
2690 /// - `name`: name of a tag opened before reading text. The corresponding end tag
2691 /// should present in input just after the text
2692 fn read_text(&mut self, name: QName) -> Result<Cow<'de, str>, DeError> {
2693 match self.next()? {
2694 DeEvent::Text(e) => match self.next()? {
2695 // The matching tag name is guaranteed by the reader
2696 DeEvent::End(_) => Ok(e.text),
2697 // SAFETY: Cannot be two consequent Text events, they would be merged into one
2698 DeEvent::Text(_) => unreachable!(),
2699 DeEvent::Start(e) => Err(DeError::UnexpectedStart(e.name().as_ref().to_owned())),
2700 DeEvent::Eof => Err(Error::missed_end(name, self.reader.decoder()).into()),
2701 },
2702 // We can get End event in case of `<tag></tag>` or `<tag/>` input
2703 // Return empty text in that case
2704 // The matching tag name is guaranteed by the reader
2705 DeEvent::End(_) => Ok("".into()),
2706 DeEvent::Start(s) => Err(DeError::UnexpectedStart(s.name().as_ref().to_owned())),
2707 DeEvent::Eof => Err(Error::missed_end(name, self.reader.decoder()).into()),
2708 }
2709 }
2710
2711 /// Drops all events until event with [name](BytesEnd::name()) `name` won't be
2712 /// dropped. This method should be called after [`Self::next()`]
2713 #[cfg(feature = "overlapped-lists")]
2714 fn read_to_end(&mut self, name: QName) -> Result<(), DeError> {
2715 let mut depth = 0;
2716 loop {
2717 match self.read.pop_front() {
2718 Some(DeEvent::Start(e)) if e.name() == name => {
2719 depth += 1;
2720 }
2721 Some(DeEvent::End(e)) if e.name() == name => {
2722 if depth == 0 {
2723 break;
2724 }
2725 depth -= 1;
2726 }
2727
2728 // Drop all other skipped events
2729 Some(_) => continue,
2730
2731 // If we do not have skipped events, use effective reading that will
2732 // not allocate memory for events
2733 None => {
2734 // We should close all opened tags, because we could buffer
2735 // Start events, but not the corresponding End events. So we
2736 // keep reading events until we exit all nested tags.
2737 // `read_to_end()` will return an error if an Eof was encountered
2738 // preliminary (in case of malformed XML).
2739 //
2740 // <tag><tag></tag></tag>
2741 // ^^^^^^^^^^ - buffered in `self.read`, when `self.read_to_end()` is called, depth = 2
2742 // ^^^^^^ - read by the first call of `self.reader.read_to_end()`
2743 // ^^^^^^ - read by the second call of `self.reader.read_to_end()`
2744 loop {
2745 self.reader.read_to_end(name)?;
2746 if depth == 0 {
2747 break;
2748 }
2749 depth -= 1;
2750 }
2751 break;
2752 }
2753 }
2754 }
2755 Ok(())
2756 }
2757 #[cfg(not(feature = "overlapped-lists"))]
2758 fn read_to_end(&mut self, name: QName) -> Result<(), DeError> {
2759 // First one might be in self.peek
2760 match self.next()? {
2761 DeEvent::Start(e) => self.reader.read_to_end(e.name())?,
2762 DeEvent::End(e) if e.name() == name => return Ok(()),
2763 _ => (),
2764 }
2765 self.reader.read_to_end(name)
2766 }
2767}
2768
2769impl<'de> Deserializer<'de, SliceReader<'de>> {
2770 /// Create new deserializer that will borrow data from the specified string.
2771 ///
2772 /// Deserializer created with this method will not resolve custom entities.
2773 #[allow(clippy::should_implement_trait)]
2774 pub fn from_str(source: &'de str) -> Self {
2775 Self::from_str_with_resolver(source, PredefinedEntityResolver)
2776 }
2777}
2778
2779impl<'de, E> Deserializer<'de, SliceReader<'de>, E>
2780where
2781 E: EntityResolver,
2782{
2783 /// Create new deserializer that will borrow data from the specified string
2784 /// and use specified entity resolver.
2785 pub fn from_str_with_resolver(source: &'de str, entity_resolver: E) -> Self {
2786 let mut reader = Reader::from_str(source);
2787 let config = reader.config_mut();
2788 config.expand_empty_elements = true;
2789
2790 Self::new(
2791 SliceReader {
2792 reader,
2793 start_trimmer: StartTrimmer::default(),
2794 },
2795 entity_resolver,
2796 )
2797 }
2798}
2799
2800impl<'de, R> Deserializer<'de, IoReader<R>>
2801where
2802 R: BufRead,
2803{
2804 /// Create new deserializer that will copy data from the specified reader
2805 /// into internal buffer.
2806 ///
2807 /// If you already have a string use [`Self::from_str`] instead, because it
2808 /// will borrow instead of copy. If you have `&[u8]` which is known to represent
2809 /// UTF-8, you can decode it first before using [`from_str`].
2810 ///
2811 /// Deserializer created with this method will not resolve custom entities.
2812 pub fn from_reader(reader: R) -> Self {
2813 Self::with_resolver(reader, PredefinedEntityResolver)
2814 }
2815}
2816
2817impl<'de, R, E> Deserializer<'de, IoReader<R>, E>
2818where
2819 R: BufRead,
2820 E: EntityResolver,
2821{
2822 /// Create new deserializer that will copy data from the specified reader
2823 /// into internal buffer and use specified entity resolver.
2824 ///
2825 /// If you already have a string use [`Self::from_str`] instead, because it
2826 /// will borrow instead of copy. If you have `&[u8]` which is known to represent
2827 /// UTF-8, you can decode it first before using [`from_str`].
2828 pub fn with_resolver(reader: R, entity_resolver: E) -> Self {
2829 let mut reader = Reader::from_reader(reader);
2830 let config = reader.config_mut();
2831 config.expand_empty_elements = true;
2832
2833 Self::new(
2834 IoReader {
2835 reader,
2836 start_trimmer: StartTrimmer::default(),
2837 buf: Vec::new(),
2838 },
2839 entity_resolver,
2840 )
2841 }
2842}
2843
2844impl<'de, 'a, R, E> de::Deserializer<'de> for &'a mut Deserializer<'de, R, E>
2845where
2846 R: XmlRead<'de>,
2847 E: EntityResolver,
2848{
2849 type Error = DeError;
2850
2851 deserialize_primitives!();
2852
2853 fn deserialize_struct<V>(
2854 self,
2855 _name: &'static str,
2856 fields: &'static [&'static str],
2857 visitor: V,
2858 ) -> Result<V::Value, DeError>
2859 where
2860 V: Visitor<'de>,
2861 {
2862 match self.next()? {
2863 DeEvent::Start(e) => visitor.visit_map(ElementMapAccess::new(self, e, fields)?),
2864 // SAFETY: The reader is guaranteed that we don't have unmatched tags
2865 // If we here, then out deserializer has a bug
2866 DeEvent::End(e) => unreachable!("{:?}", e),
2867 // Deserializer methods are only hints, if deserializer could not satisfy
2868 // request, it should return the data that it has. It is responsibility
2869 // of a Visitor to return an error if it does not understand the data
2870 DeEvent::Text(e) => match e.text {
2871 Cow::Borrowed(s) => visitor.visit_borrowed_str(s),
2872 Cow::Owned(s) => visitor.visit_string(s),
2873 },
2874 DeEvent::Eof => Err(DeError::UnexpectedEof),
2875 }
2876 }
2877
2878 /// Unit represented in XML as a `xs:element` or text/CDATA content.
2879 /// Any content inside `xs:element` is ignored and skipped.
2880 ///
2881 /// Produces unit struct from any of following inputs:
2882 /// - any `<tag ...>...</tag>`
2883 /// - any `<tag .../>`
2884 /// - any consequent text / CDATA content (can consist of several parts
2885 /// delimited by comments and processing instructions)
2886 ///
2887 /// # Events handling
2888 ///
2889 /// |Event |XML |Handling
2890 /// |------------------|---------------------------|-------------------------------------------
2891 /// |[`DeEvent::Start`]|`<tag>...</tag>` |Calls `visitor.visit_unit()`, consumes all events up to and including corresponding `End` event
2892 /// |[`DeEvent::End`] |`</tag>` |This is impossible situation, the method will panic if it happens
2893 /// |[`DeEvent::Text`] |`text content` or `<![CDATA[cdata content]]>` (probably mixed)|Calls `visitor.visit_unit()`. The content is ignored
2894 /// |[`DeEvent::Eof`] | |Emits [`UnexpectedEof`](DeError::UnexpectedEof)
2895 fn deserialize_unit<V>(self, visitor: V) -> Result<V::Value, DeError>
2896 where
2897 V: Visitor<'de>,
2898 {
2899 match self.next()? {
2900 DeEvent::Start(s) => {
2901 self.read_to_end(s.name())?;
2902 visitor.visit_unit()
2903 }
2904 DeEvent::Text(_) => visitor.visit_unit(),
2905 // SAFETY: The reader is guaranteed that we don't have unmatched tags
2906 // If we here, then out deserializer has a bug
2907 DeEvent::End(e) => unreachable!("{:?}", e),
2908 DeEvent::Eof => Err(DeError::UnexpectedEof),
2909 }
2910 }
2911
2912 /// Forwards deserialization of the inner type. Always calls [`Visitor::visit_newtype_struct`]
2913 /// with the same deserializer.
2914 fn deserialize_newtype_struct<V>(
2915 self,
2916 _name: &'static str,
2917 visitor: V,
2918 ) -> Result<V::Value, DeError>
2919 where
2920 V: Visitor<'de>,
2921 {
2922 visitor.visit_newtype_struct(self)
2923 }
2924
2925 fn deserialize_enum<V>(
2926 self,
2927 _name: &'static str,
2928 _variants: &'static [&'static str],
2929 visitor: V,
2930 ) -> Result<V::Value, DeError>
2931 where
2932 V: Visitor<'de>,
2933 {
2934 visitor.visit_enum(var::EnumAccess::new(self))
2935 }
2936
2937 fn deserialize_seq<V>(self, visitor: V) -> Result<V::Value, DeError>
2938 where
2939 V: Visitor<'de>,
2940 {
2941 visitor.visit_seq(self)
2942 }
2943
2944 fn deserialize_option<V>(self, visitor: V) -> Result<V::Value, DeError>
2945 where
2946 V: Visitor<'de>,
2947 {
2948 match self.peek()? {
2949 DeEvent::Text(t) if t.is_empty() => visitor.visit_none(),
2950 DeEvent::Eof => visitor.visit_none(),
2951 _ => visitor.visit_some(self),
2952 }
2953 }
2954
2955 fn deserialize_any<V>(self, visitor: V) -> Result<V::Value, DeError>
2956 where
2957 V: Visitor<'de>,
2958 {
2959 match self.peek()? {
2960 DeEvent::Text(_) => self.deserialize_str(visitor),
2961 _ => self.deserialize_map(visitor),
2962 }
2963 }
2964}
2965
2966/// An accessor to sequence elements forming a value for top-level sequence of XML
2967/// elements.
2968///
2969/// Technically, multiple top-level elements violates XML rule of only one top-level
2970/// element, but we consider this as several concatenated XML documents.
2971impl<'de, 'a, R, E> SeqAccess<'de> for &'a mut Deserializer<'de, R, E>
2972where
2973 R: XmlRead<'de>,
2974 E: EntityResolver,
2975{
2976 type Error = DeError;
2977
2978 fn next_element_seed<T>(&mut self, seed: T) -> Result<Option<T::Value>, Self::Error>
2979 where
2980 T: DeserializeSeed<'de>,
2981 {
2982 match self.peek()? {
2983 DeEvent::Eof => {
2984 // We need to consume event in order to self.is_empty() worked
2985 self.next()?;
2986 Ok(None)
2987 }
2988
2989 // Start(tag), End(tag), Text
2990 _ => seed.deserialize(&mut **self).map(Some),
2991 }
2992 }
2993}
2994
2995impl<'de, 'a, R, E> IntoDeserializer<'de, DeError> for &'a mut Deserializer<'de, R, E>
2996where
2997 R: XmlRead<'de>,
2998 E: EntityResolver,
2999{
3000 type Deserializer = Self;
3001
3002 #[inline]
3003 fn into_deserializer(self) -> Self {
3004 self
3005 }
3006}
3007
3008////////////////////////////////////////////////////////////////////////////////////////////////////
3009
3010/// Helper struct that contains a state for an algorithm of converting events
3011/// from raw events to semi-trimmed events that is independent from a way of
3012/// events reading.
3013struct StartTrimmer {
3014 /// If `true`, then leading whitespace will be removed from next returned
3015 /// [`Event::Text`]. This field is set to `true` after reading each event
3016 /// except [`Event::Text`] and [`Event::CData`], so [`Event::Text`] events
3017 /// read right after them does not trimmed.
3018 trim_start: bool,
3019}
3020
3021impl StartTrimmer {
3022 /// Converts raw reader's event into a payload event.
3023 /// Returns `None`, if event should be skipped.
3024 #[inline(always)]
3025 fn trim<'a>(&mut self, event: Event<'a>) -> Option<PayloadEvent<'a>> {
3026 let (event, trim_next_event) = match event {
3027 Event::DocType(e) => (PayloadEvent::DocType(e), true),
3028 Event::Start(e) => (PayloadEvent::Start(e), true),
3029 Event::End(e) => (PayloadEvent::End(e), true),
3030 Event::Eof => (PayloadEvent::Eof, true),
3031
3032 // Do not trim next text event after Text or CDATA event
3033 Event::CData(e) => (PayloadEvent::CData(e), false),
3034 Event::Text(mut e) => {
3035 // If event is empty after trimming, skip it
3036 if self.trim_start && e.inplace_trim_start() {
3037 return None;
3038 }
3039 (PayloadEvent::Text(e), false)
3040 }
3041
3042 _ => return None,
3043 };
3044 self.trim_start = trim_next_event;
3045 Some(event)
3046 }
3047}
3048
3049impl Default for StartTrimmer {
3050 #[inline]
3051 fn default() -> Self {
3052 Self { trim_start: true }
3053 }
3054}
3055
3056////////////////////////////////////////////////////////////////////////////////////////////////////
3057
3058/// Trait used by the deserializer for iterating over input. This is manually
3059/// "specialized" for iterating over `&[u8]`.
3060///
3061/// You do not need to implement this trait, it is needed to abstract from
3062/// [borrowing](SliceReader) and [copying](IoReader) data sources and reuse code in
3063/// deserializer
3064pub trait XmlRead<'i> {
3065 /// Return an input-borrowing event.
3066 fn next(&mut self) -> Result<PayloadEvent<'i>, DeError>;
3067
3068 /// Skips until end element is found. Unlike `next()` it will not allocate
3069 /// when it cannot satisfy the lifetime.
3070 fn read_to_end(&mut self, name: QName) -> Result<(), DeError>;
3071
3072 /// A copy of the reader's decoder used to decode strings.
3073 fn decoder(&self) -> Decoder;
3074}
3075
3076/// XML input source that reads from a std::io input stream.
3077///
3078/// You cannot create it, it is created automatically when you call
3079/// [`Deserializer::from_reader`]
3080pub struct IoReader<R: BufRead> {
3081 reader: Reader<R>,
3082 start_trimmer: StartTrimmer,
3083 buf: Vec<u8>,
3084}
3085
3086impl<R: BufRead> IoReader<R> {
3087 /// Returns the underlying XML reader.
3088 ///
3089 /// ```
3090 /// # use pretty_assertions::assert_eq;
3091 /// use serde::Deserialize;
3092 /// use std::io::Cursor;
3093 /// use quick_xml::de::Deserializer;
3094 /// use quick_xml::Reader;
3095 ///
3096 /// #[derive(Deserialize)]
3097 /// struct SomeStruct {
3098 /// field1: String,
3099 /// field2: String,
3100 /// }
3101 ///
3102 /// // Try to deserialize from broken XML
3103 /// let mut de = Deserializer::from_reader(Cursor::new(
3104 /// "<SomeStruct><field1><field2></SomeStruct>"
3105 /// // 0 ^= 28 ^= 41
3106 /// ));
3107 ///
3108 /// let err = SomeStruct::deserialize(&mut de);
3109 /// assert!(err.is_err());
3110 ///
3111 /// let reader: &Reader<Cursor<&str>> = de.get_ref().get_ref();
3112 ///
3113 /// assert_eq!(reader.error_position(), 28);
3114 /// assert_eq!(reader.buffer_position(), 41);
3115 /// ```
3116 pub const fn get_ref(&self) -> &Reader<R> {
3117 &self.reader
3118 }
3119}
3120
3121impl<'i, R: BufRead> XmlRead<'i> for IoReader<R> {
3122 fn next(&mut self) -> Result<PayloadEvent<'static>, DeError> {
3123 loop {
3124 self.buf.clear();
3125
3126 let event = self.reader.read_event_into(&mut self.buf)?;
3127 if let Some(event) = self.start_trimmer.trim(event) {
3128 return Ok(event.into_owned());
3129 }
3130 }
3131 }
3132
3133 fn read_to_end(&mut self, name: QName) -> Result<(), DeError> {
3134 match self.reader.read_to_end_into(name, &mut self.buf) {
3135 Err(e) => Err(e.into()),
3136 Ok(_) => Ok(()),
3137 }
3138 }
3139
3140 fn decoder(&self) -> Decoder {
3141 self.reader.decoder()
3142 }
3143}
3144
3145/// XML input source that reads from a slice of bytes and can borrow from it.
3146///
3147/// You cannot create it, it is created automatically when you call
3148/// [`Deserializer::from_str`].
3149pub struct SliceReader<'de> {
3150 reader: Reader<&'de [u8]>,
3151 start_trimmer: StartTrimmer,
3152}
3153
3154impl<'de> SliceReader<'de> {
3155 /// Returns the underlying XML reader.
3156 ///
3157 /// ```
3158 /// # use pretty_assertions::assert_eq;
3159 /// use serde::Deserialize;
3160 /// use quick_xml::de::Deserializer;
3161 /// use quick_xml::Reader;
3162 ///
3163 /// #[derive(Deserialize)]
3164 /// struct SomeStruct {
3165 /// field1: String,
3166 /// field2: String,
3167 /// }
3168 ///
3169 /// // Try to deserialize from broken XML
3170 /// let mut de = Deserializer::from_str(
3171 /// "<SomeStruct><field1><field2></SomeStruct>"
3172 /// // 0 ^= 28 ^= 41
3173 /// );
3174 ///
3175 /// let err = SomeStruct::deserialize(&mut de);
3176 /// assert!(err.is_err());
3177 ///
3178 /// let reader: &Reader<&[u8]> = de.get_ref().get_ref();
3179 ///
3180 /// assert_eq!(reader.error_position(), 28);
3181 /// assert_eq!(reader.buffer_position(), 41);
3182 /// ```
3183 pub const fn get_ref(&self) -> &Reader<&'de [u8]> {
3184 &self.reader
3185 }
3186}
3187
3188impl<'de> XmlRead<'de> for SliceReader<'de> {
3189 fn next(&mut self) -> Result<PayloadEvent<'de>, DeError> {
3190 loop {
3191 let event = self.reader.read_event()?;
3192 if let Some(event) = self.start_trimmer.trim(event) {
3193 return Ok(event);
3194 }
3195 }
3196 }
3197
3198 fn read_to_end(&mut self, name: QName) -> Result<(), DeError> {
3199 match self.reader.read_to_end(name) {
3200 Err(e) => Err(e.into()),
3201 Ok(_) => Ok(()),
3202 }
3203 }
3204
3205 fn decoder(&self) -> Decoder {
3206 self.reader.decoder()
3207 }
3208}
3209
3210#[cfg(test)]
3211mod tests {
3212 use super::*;
3213 use crate::errors::IllFormedError;
3214 use pretty_assertions::assert_eq;
3215
3216 fn make_de<'de>(source: &'de str) -> Deserializer<'de, SliceReader<'de>> {
3217 dbg!(source);
3218 Deserializer::from_str(source)
3219 }
3220
3221 #[cfg(feature = "overlapped-lists")]
3222 mod skip {
3223 use super::*;
3224 use crate::de::DeEvent::*;
3225 use crate::events::BytesEnd;
3226 use pretty_assertions::assert_eq;
3227
3228 /// Checks that `peek()` and `read()` behaves correctly after `skip()`
3229 #[test]
3230 fn read_and_peek() {
3231 let mut de = make_de(
3232 r#"
3233 <root>
3234 <inner>
3235 text
3236 <inner/>
3237 </inner>
3238 <next/>
3239 <target/>
3240 </root>
3241 "#,
3242 );
3243
3244 // Initial conditions - both are empty
3245 assert_eq!(de.read, vec![]);
3246 assert_eq!(de.write, vec![]);
3247
3248 assert_eq!(de.next().unwrap(), Start(BytesStart::new("root")));
3249 assert_eq!(de.peek().unwrap(), &Start(BytesStart::new("inner")));
3250
3251 // Mark that start_replay() should begin replay from this point
3252 let checkpoint = de.skip_checkpoint();
3253 assert_eq!(checkpoint, 0);
3254
3255 // Should skip first <inner> tree
3256 de.skip().unwrap();
3257 assert_eq!(de.read, vec![]);
3258 assert_eq!(
3259 de.write,
3260 vec![
3261 Start(BytesStart::new("inner")),
3262 Text("text".into()),
3263 Start(BytesStart::new("inner")),
3264 End(BytesEnd::new("inner")),
3265 End(BytesEnd::new("inner")),
3266 ]
3267 );
3268
3269 // Consume <next/>. Now unconsumed XML looks like:
3270 //
3271 // <inner>
3272 // text
3273 // <inner/>
3274 // </inner>
3275 // <target/>
3276 // </root>
3277 assert_eq!(de.next().unwrap(), Start(BytesStart::new("next")));
3278 assert_eq!(de.next().unwrap(), End(BytesEnd::new("next")));
3279
3280 // We finish writing. Next call to `next()` should start replay that messages:
3281 //
3282 // <inner>
3283 // text
3284 // <inner/>
3285 // </inner>
3286 //
3287 // and after that stream that messages:
3288 //
3289 // <target/>
3290 // </root>
3291 de.start_replay(checkpoint);
3292 assert_eq!(
3293 de.read,
3294 vec![
3295 Start(BytesStart::new("inner")),
3296 Text("text".into()),
3297 Start(BytesStart::new("inner")),
3298 End(BytesEnd::new("inner")),
3299 End(BytesEnd::new("inner")),
3300 ]
3301 );
3302 assert_eq!(de.write, vec![]);
3303 assert_eq!(de.next().unwrap(), Start(BytesStart::new("inner")));
3304
3305 // Mark that start_replay() should begin replay from this point
3306 let checkpoint = de.skip_checkpoint();
3307 assert_eq!(checkpoint, 0);
3308
3309 // Skip `$text` node and consume <inner/> after it
3310 de.skip().unwrap();
3311 assert_eq!(
3312 de.read,
3313 vec![
3314 Start(BytesStart::new("inner")),
3315 End(BytesEnd::new("inner")),
3316 End(BytesEnd::new("inner")),
3317 ]
3318 );
3319 assert_eq!(
3320 de.write,
3321 vec![
3322 // This comment here to keep the same formatting of both arrays
3323 // otherwise rustfmt suggest one-line it
3324 Text("text".into()),
3325 ]
3326 );
3327
3328 assert_eq!(de.next().unwrap(), Start(BytesStart::new("inner")));
3329 assert_eq!(de.next().unwrap(), End(BytesEnd::new("inner")));
3330
3331 // We finish writing. Next call to `next()` should start replay messages:
3332 //
3333 // text
3334 // </inner>
3335 //
3336 // and after that stream that messages:
3337 //
3338 // <target/>
3339 // </root>
3340 de.start_replay(checkpoint);
3341 assert_eq!(
3342 de.read,
3343 vec![
3344 // This comment here to keep the same formatting as others
3345 // otherwise rustfmt suggest one-line it
3346 Text("text".into()),
3347 End(BytesEnd::new("inner")),
3348 ]
3349 );
3350 assert_eq!(de.write, vec![]);
3351 assert_eq!(de.next().unwrap(), Text("text".into()));
3352 assert_eq!(de.next().unwrap(), End(BytesEnd::new("inner")));
3353 assert_eq!(de.next().unwrap(), Start(BytesStart::new("target")));
3354 assert_eq!(de.next().unwrap(), End(BytesEnd::new("target")));
3355 assert_eq!(de.next().unwrap(), End(BytesEnd::new("root")));
3356 assert_eq!(de.next().unwrap(), Eof);
3357 }
3358
3359 /// Checks that `read_to_end()` behaves correctly after `skip()`
3360 #[test]
3361 fn read_to_end() {
3362 let mut de = make_de(
3363 r#"
3364 <root>
3365 <skip>
3366 text
3367 <skip/>
3368 </skip>
3369 <target>
3370 <target/>
3371 </target>
3372 </root>
3373 "#,
3374 );
3375
3376 // Initial conditions - both are empty
3377 assert_eq!(de.read, vec![]);
3378 assert_eq!(de.write, vec![]);
3379
3380 assert_eq!(de.next().unwrap(), Start(BytesStart::new("root")));
3381
3382 // Mark that start_replay() should begin replay from this point
3383 let checkpoint = de.skip_checkpoint();
3384 assert_eq!(checkpoint, 0);
3385
3386 // Skip the <skip> tree
3387 de.skip().unwrap();
3388 assert_eq!(de.read, vec![]);
3389 assert_eq!(
3390 de.write,
3391 vec![
3392 Start(BytesStart::new("skip")),
3393 Text("text".into()),
3394 Start(BytesStart::new("skip")),
3395 End(BytesEnd::new("skip")),
3396 End(BytesEnd::new("skip")),
3397 ]
3398 );
3399
3400 // Drop all events that represents <target> tree. Now unconsumed XML looks like:
3401 //
3402 // <skip>
3403 // text
3404 // <skip/>
3405 // </skip>
3406 // </root>
3407 assert_eq!(de.next().unwrap(), Start(BytesStart::new("target")));
3408 de.read_to_end(QName(b"target")).unwrap();
3409 assert_eq!(de.read, vec![]);
3410 assert_eq!(
3411 de.write,
3412 vec![
3413 Start(BytesStart::new("skip")),
3414 Text("text".into()),
3415 Start(BytesStart::new("skip")),
3416 End(BytesEnd::new("skip")),
3417 End(BytesEnd::new("skip")),
3418 ]
3419 );
3420
3421 // We finish writing. Next call to `next()` should start replay that messages:
3422 //
3423 // <skip>
3424 // text
3425 // <skip/>
3426 // </skip>
3427 //
3428 // and after that stream that messages:
3429 //
3430 // </root>
3431 de.start_replay(checkpoint);
3432 assert_eq!(
3433 de.read,
3434 vec![
3435 Start(BytesStart::new("skip")),
3436 Text("text".into()),
3437 Start(BytesStart::new("skip")),
3438 End(BytesEnd::new("skip")),
3439 End(BytesEnd::new("skip")),
3440 ]
3441 );
3442 assert_eq!(de.write, vec![]);
3443
3444 assert_eq!(de.next().unwrap(), Start(BytesStart::new("skip")));
3445 de.read_to_end(QName(b"skip")).unwrap();
3446
3447 assert_eq!(de.next().unwrap(), End(BytesEnd::new("root")));
3448 assert_eq!(de.next().unwrap(), Eof);
3449 }
3450
3451 /// Checks that replay replayes only part of events
3452 /// Test for https://github.com/tafia/quick-xml/issues/435
3453 #[test]
3454 fn partial_replay() {
3455 let mut de = make_de(
3456 r#"
3457 <root>
3458 <skipped-1/>
3459 <skipped-2/>
3460 <inner>
3461 <skipped-3/>
3462 <skipped-4/>
3463 <target-2/>
3464 </inner>
3465 <target-1/>
3466 </root>
3467 "#,
3468 );
3469
3470 // Initial conditions - both are empty
3471 assert_eq!(de.read, vec![]);
3472 assert_eq!(de.write, vec![]);
3473
3474 assert_eq!(de.next().unwrap(), Start(BytesStart::new("root")));
3475
3476 // start_replay() should start replay from this point
3477 let checkpoint1 = de.skip_checkpoint();
3478 assert_eq!(checkpoint1, 0);
3479
3480 // Should skip first and second <skipped-N/> elements
3481 de.skip().unwrap(); // skipped-1
3482 de.skip().unwrap(); // skipped-2
3483 assert_eq!(de.read, vec![]);
3484 assert_eq!(
3485 de.write,
3486 vec![
3487 Start(BytesStart::new("skipped-1")),
3488 End(BytesEnd::new("skipped-1")),
3489 Start(BytesStart::new("skipped-2")),
3490 End(BytesEnd::new("skipped-2")),
3491 ]
3492 );
3493
3494 ////////////////////////////////////////////////////////////////////////////////////////
3495
3496 assert_eq!(de.next().unwrap(), Start(BytesStart::new("inner")));
3497 assert_eq!(de.peek().unwrap(), &Start(BytesStart::new("skipped-3")));
3498 assert_eq!(
3499 de.read,
3500 vec![
3501 // This comment here to keep the same formatting of both arrays
3502 // otherwise rustfmt suggest one-line it
3503 Start(BytesStart::new("skipped-3")),
3504 ]
3505 );
3506 assert_eq!(
3507 de.write,
3508 vec![
3509 Start(BytesStart::new("skipped-1")),
3510 End(BytesEnd::new("skipped-1")),
3511 Start(BytesStart::new("skipped-2")),
3512 End(BytesEnd::new("skipped-2")),
3513 ]
3514 );
3515
3516 // start_replay() should start replay from this point
3517 let checkpoint2 = de.skip_checkpoint();
3518 assert_eq!(checkpoint2, 4);
3519
3520 // Should skip third and forth <skipped-N/> elements
3521 de.skip().unwrap(); // skipped-3
3522 de.skip().unwrap(); // skipped-4
3523 assert_eq!(de.read, vec![]);
3524 assert_eq!(
3525 de.write,
3526 vec![
3527 // checkpoint 1
3528 Start(BytesStart::new("skipped-1")),
3529 End(BytesEnd::new("skipped-1")),
3530 Start(BytesStart::new("skipped-2")),
3531 End(BytesEnd::new("skipped-2")),
3532 // checkpoint 2
3533 Start(BytesStart::new("skipped-3")),
3534 End(BytesEnd::new("skipped-3")),
3535 Start(BytesStart::new("skipped-4")),
3536 End(BytesEnd::new("skipped-4")),
3537 ]
3538 );
3539 assert_eq!(de.next().unwrap(), Start(BytesStart::new("target-2")));
3540 assert_eq!(de.next().unwrap(), End(BytesEnd::new("target-2")));
3541 assert_eq!(de.peek().unwrap(), &End(BytesEnd::new("inner")));
3542 assert_eq!(
3543 de.read,
3544 vec![
3545 // This comment here to keep the same formatting of both arrays
3546 // otherwise rustfmt suggest one-line it
3547 End(BytesEnd::new("inner")),
3548 ]
3549 );
3550 assert_eq!(
3551 de.write,
3552 vec![
3553 // checkpoint 1
3554 Start(BytesStart::new("skipped-1")),
3555 End(BytesEnd::new("skipped-1")),
3556 Start(BytesStart::new("skipped-2")),
3557 End(BytesEnd::new("skipped-2")),
3558 // checkpoint 2
3559 Start(BytesStart::new("skipped-3")),
3560 End(BytesEnd::new("skipped-3")),
3561 Start(BytesStart::new("skipped-4")),
3562 End(BytesEnd::new("skipped-4")),
3563 ]
3564 );
3565
3566 // Start replay events from checkpoint 2
3567 de.start_replay(checkpoint2);
3568 assert_eq!(
3569 de.read,
3570 vec![
3571 Start(BytesStart::new("skipped-3")),
3572 End(BytesEnd::new("skipped-3")),
3573 Start(BytesStart::new("skipped-4")),
3574 End(BytesEnd::new("skipped-4")),
3575 End(BytesEnd::new("inner")),
3576 ]
3577 );
3578 assert_eq!(
3579 de.write,
3580 vec![
3581 Start(BytesStart::new("skipped-1")),
3582 End(BytesEnd::new("skipped-1")),
3583 Start(BytesStart::new("skipped-2")),
3584 End(BytesEnd::new("skipped-2")),
3585 ]
3586 );
3587
3588 // Replayed events
3589 assert_eq!(de.next().unwrap(), Start(BytesStart::new("skipped-3")));
3590 assert_eq!(de.next().unwrap(), End(BytesEnd::new("skipped-3")));
3591 assert_eq!(de.next().unwrap(), Start(BytesStart::new("skipped-4")));
3592 assert_eq!(de.next().unwrap(), End(BytesEnd::new("skipped-4")));
3593
3594 assert_eq!(de.next().unwrap(), End(BytesEnd::new("inner")));
3595 assert_eq!(de.read, vec![]);
3596 assert_eq!(
3597 de.write,
3598 vec![
3599 Start(BytesStart::new("skipped-1")),
3600 End(BytesEnd::new("skipped-1")),
3601 Start(BytesStart::new("skipped-2")),
3602 End(BytesEnd::new("skipped-2")),
3603 ]
3604 );
3605
3606 ////////////////////////////////////////////////////////////////////////////////////////
3607
3608 // New events
3609 assert_eq!(de.next().unwrap(), Start(BytesStart::new("target-1")));
3610 assert_eq!(de.next().unwrap(), End(BytesEnd::new("target-1")));
3611
3612 assert_eq!(de.read, vec![]);
3613 assert_eq!(
3614 de.write,
3615 vec![
3616 Start(BytesStart::new("skipped-1")),
3617 End(BytesEnd::new("skipped-1")),
3618 Start(BytesStart::new("skipped-2")),
3619 End(BytesEnd::new("skipped-2")),
3620 ]
3621 );
3622
3623 // Start replay events from checkpoint 1
3624 de.start_replay(checkpoint1);
3625 assert_eq!(
3626 de.read,
3627 vec![
3628 Start(BytesStart::new("skipped-1")),
3629 End(BytesEnd::new("skipped-1")),
3630 Start(BytesStart::new("skipped-2")),
3631 End(BytesEnd::new("skipped-2")),
3632 ]
3633 );
3634 assert_eq!(de.write, vec![]);
3635
3636 // Replayed events
3637 assert_eq!(de.next().unwrap(), Start(BytesStart::new("skipped-1")));
3638 assert_eq!(de.next().unwrap(), End(BytesEnd::new("skipped-1")));
3639 assert_eq!(de.next().unwrap(), Start(BytesStart::new("skipped-2")));
3640 assert_eq!(de.next().unwrap(), End(BytesEnd::new("skipped-2")));
3641
3642 assert_eq!(de.read, vec![]);
3643 assert_eq!(de.write, vec![]);
3644
3645 // New events
3646 assert_eq!(de.next().unwrap(), End(BytesEnd::new("root")));
3647 assert_eq!(de.next().unwrap(), Eof);
3648 }
3649
3650 /// Checks that limiting buffer size works correctly
3651 #[test]
3652 fn limit() {
3653 use serde::Deserialize;
3654
3655 #[derive(Debug, Deserialize)]
3656 #[allow(unused)]
3657 struct List {
3658 item: Vec<()>,
3659 }
3660
3661 let mut de = make_de(
3662 r#"
3663 <any-name>
3664 <item/>
3665 <another-item>
3666 <some-element>with text</some-element>
3667 <yet-another-element/>
3668 </another-item>
3669 <item/>
3670 <item/>
3671 </any-name>
3672 "#,
3673 );
3674 de.event_buffer_size(NonZeroUsize::new(3));
3675
3676 match List::deserialize(&mut de) {
3677 Err(DeError::TooManyEvents(count)) => assert_eq!(count.get(), 3),
3678 e => panic!("Expected `Err(TooManyEvents(3))`, but got `{:?}`", e),
3679 }
3680 }
3681
3682 /// Without handling Eof in `skip` this test failed with memory allocation
3683 #[test]
3684 fn invalid_xml() {
3685 use crate::de::DeEvent::*;
3686
3687 let mut de = make_de("<root>");
3688
3689 // Cache all events
3690 let checkpoint = de.skip_checkpoint();
3691 de.skip().unwrap();
3692 de.start_replay(checkpoint);
3693 assert_eq!(de.read, vec![Start(BytesStart::new("root")), Eof]);
3694 }
3695 }
3696
3697 mod read_to_end {
3698 use super::*;
3699 use crate::de::DeEvent::*;
3700 use pretty_assertions::assert_eq;
3701
3702 #[test]
3703 fn complex() {
3704 let mut de = make_de(
3705 r#"
3706 <root>
3707 <tag a="1"><tag>text</tag>content</tag>
3708 <tag a="2"><![CDATA[cdata content]]></tag>
3709 <self-closed/>
3710 </root>
3711 "#,
3712 );
3713
3714 assert_eq!(de.next().unwrap(), Start(BytesStart::new("root")));
3715
3716 assert_eq!(
3717 de.next().unwrap(),
3718 Start(BytesStart::from_content(r#"tag a="1""#, 3))
3719 );
3720 assert_eq!(de.read_to_end(QName(b"tag")).unwrap(), ());
3721
3722 assert_eq!(
3723 de.next().unwrap(),
3724 Start(BytesStart::from_content(r#"tag a="2""#, 3))
3725 );
3726 assert_eq!(de.next().unwrap(), Text("cdata content".into()));
3727 assert_eq!(de.next().unwrap(), End(BytesEnd::new("tag")));
3728
3729 assert_eq!(de.next().unwrap(), Start(BytesStart::new("self-closed")));
3730 assert_eq!(de.read_to_end(QName(b"self-closed")).unwrap(), ());
3731
3732 assert_eq!(de.next().unwrap(), End(BytesEnd::new("root")));
3733 assert_eq!(de.next().unwrap(), Eof);
3734 }
3735
3736 #[test]
3737 fn invalid_xml1() {
3738 let mut de = make_de("<tag><tag></tag>");
3739
3740 assert_eq!(de.next().unwrap(), Start(BytesStart::new("tag")));
3741 assert_eq!(de.peek().unwrap(), &Start(BytesStart::new("tag")));
3742
3743 match de.read_to_end(QName(b"tag")) {
3744 Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
3745 assert_eq!(cause, IllFormedError::MissingEndTag("tag".into()))
3746 }
3747 x => panic!(
3748 "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
3749 x
3750 ),
3751 }
3752 assert_eq!(de.next().unwrap(), Eof);
3753 }
3754
3755 #[test]
3756 fn invalid_xml2() {
3757 let mut de = make_de("<tag><![CDATA[]]><tag></tag>");
3758
3759 assert_eq!(de.next().unwrap(), Start(BytesStart::new("tag")));
3760 assert_eq!(de.peek().unwrap(), &Text("".into()));
3761
3762 match de.read_to_end(QName(b"tag")) {
3763 Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
3764 assert_eq!(cause, IllFormedError::MissingEndTag("tag".into()))
3765 }
3766 x => panic!(
3767 "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
3768 x
3769 ),
3770 }
3771 assert_eq!(de.next().unwrap(), Eof);
3772 }
3773 }
3774
3775 #[test]
3776 fn borrowing_reader_parity() {
3777 let s = r#"
3778 <item name="hello" source="world.rs">Some text</item>
3779 <item2/>
3780 <item3 value="world" />
3781 "#;
3782
3783 let mut reader1 = IoReader {
3784 reader: Reader::from_reader(s.as_bytes()),
3785 start_trimmer: StartTrimmer::default(),
3786 buf: Vec::new(),
3787 };
3788 let mut reader2 = SliceReader {
3789 reader: Reader::from_str(s),
3790 start_trimmer: StartTrimmer::default(),
3791 };
3792
3793 loop {
3794 let event1 = reader1.next().unwrap();
3795 let event2 = reader2.next().unwrap();
3796
3797 if let (PayloadEvent::Eof, PayloadEvent::Eof) = (&event1, &event2) {
3798 break;
3799 }
3800
3801 assert_eq!(event1, event2);
3802 }
3803 }
3804
3805 #[test]
3806 fn borrowing_reader_events() {
3807 let s = r#"
3808 <item name="hello" source="world.rs">Some text</item>
3809 <item2></item2>
3810 <item3/>
3811 <item4 value="world" />
3812 "#;
3813
3814 let mut reader = SliceReader {
3815 reader: Reader::from_str(s),
3816 start_trimmer: StartTrimmer::default(),
3817 };
3818
3819 let config = reader.reader.config_mut();
3820 config.expand_empty_elements = true;
3821
3822 let mut events = Vec::new();
3823
3824 loop {
3825 let event = reader.next().unwrap();
3826 if let PayloadEvent::Eof = event {
3827 break;
3828 }
3829 events.push(event);
3830 }
3831
3832 use crate::de::PayloadEvent::*;
3833
3834 assert_eq!(
3835 events,
3836 vec![
3837 Start(BytesStart::from_content(
3838 r#"item name="hello" source="world.rs""#,
3839 4
3840 )),
3841 Text(BytesText::from_escaped("Some text")),
3842 End(BytesEnd::new("item")),
3843 Start(BytesStart::from_content("item2", 5)),
3844 End(BytesEnd::new("item2")),
3845 Start(BytesStart::from_content("item3", 5)),
3846 End(BytesEnd::new("item3")),
3847 Start(BytesStart::from_content(r#"item4 value="world" "#, 5)),
3848 End(BytesEnd::new("item4")),
3849 ]
3850 )
3851 }
3852
3853 /// Ensures, that [`Deserializer::read_string()`] never can get an `End` event,
3854 /// because parser reports error early
3855 #[test]
3856 fn read_string() {
3857 match from_str::<String>(r#"</root>"#) {
3858 Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
3859 assert_eq!(cause, IllFormedError::UnmatchedEndTag("root".into()));
3860 }
3861 x => panic!(
3862 "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
3863 x
3864 ),
3865 }
3866
3867 let s: String = from_str(r#"<root></root>"#).unwrap();
3868 assert_eq!(s, "");
3869
3870 match from_str::<String>(r#"<root></other>"#) {
3871 Err(DeError::InvalidXml(Error::IllFormed(cause))) => assert_eq!(
3872 cause,
3873 IllFormedError::MismatchedEndTag {
3874 expected: "root".into(),
3875 found: "other".into(),
3876 }
3877 ),
3878 x => panic!("Expected `Err(InvalidXml(IllFormed(_))`, but got `{:?}`", x),
3879 }
3880 }
3881
3882 /// Tests for https://github.com/tafia/quick-xml/issues/474.
3883 ///
3884 /// That tests ensures that comments and processed instructions is ignored
3885 /// and can split one logical string in pieces.
3886 mod merge_text {
3887 use super::*;
3888 use pretty_assertions::assert_eq;
3889
3890 #[test]
3891 fn text() {
3892 let mut de = make_de("text");
3893 assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
3894 }
3895
3896 #[test]
3897 fn cdata() {
3898 let mut de = make_de("<![CDATA[cdata]]>");
3899 assert_eq!(de.next().unwrap(), DeEvent::Text("cdata".into()));
3900 }
3901
3902 #[test]
3903 fn text_and_cdata() {
3904 let mut de = make_de("text and <![CDATA[cdata]]>");
3905 assert_eq!(de.next().unwrap(), DeEvent::Text("text and cdata".into()));
3906 }
3907
3908 #[test]
3909 fn text_and_empty_cdata() {
3910 let mut de = make_de("text and <![CDATA[]]>");
3911 assert_eq!(de.next().unwrap(), DeEvent::Text("text and ".into()));
3912 }
3913
3914 #[test]
3915 fn cdata_and_text() {
3916 let mut de = make_de("<![CDATA[cdata]]> and text");
3917 assert_eq!(de.next().unwrap(), DeEvent::Text("cdata and text".into()));
3918 }
3919
3920 #[test]
3921 fn empty_cdata_and_text() {
3922 let mut de = make_de("<![CDATA[]]> and text");
3923 assert_eq!(de.next().unwrap(), DeEvent::Text(" and text".into()));
3924 }
3925
3926 #[test]
3927 fn cdata_and_cdata() {
3928 let mut de = make_de(
3929 "\
3930 <![CDATA[cdata]]]]>\
3931 <![CDATA[>cdata]]>\
3932 ",
3933 );
3934 assert_eq!(de.next().unwrap(), DeEvent::Text("cdata]]>cdata".into()));
3935 }
3936
3937 mod comment_between {
3938 use super::*;
3939 use pretty_assertions::assert_eq;
3940
3941 #[test]
3942 fn text() {
3943 let mut de = make_de(
3944 "\
3945 text \
3946 <!--comment 1--><!--comment 2--> \
3947 text\
3948 ",
3949 );
3950 assert_eq!(de.next().unwrap(), DeEvent::Text("text text".into()));
3951 }
3952
3953 #[test]
3954 fn cdata() {
3955 let mut de = make_de(
3956 "\
3957 <![CDATA[cdata]]]]>\
3958 <!--comment 1--><!--comment 2-->\
3959 <![CDATA[>cdata]]>\
3960 ",
3961 );
3962 assert_eq!(de.next().unwrap(), DeEvent::Text("cdata]]>cdata".into()));
3963 }
3964
3965 #[test]
3966 fn text_and_cdata() {
3967 let mut de = make_de(
3968 "\
3969 text \
3970 <!--comment 1--><!--comment 2-->\
3971 <![CDATA[ cdata]]>\
3972 ",
3973 );
3974 assert_eq!(de.next().unwrap(), DeEvent::Text("text cdata".into()));
3975 }
3976
3977 #[test]
3978 fn text_and_empty_cdata() {
3979 let mut de = make_de(
3980 "\
3981 text \
3982 <!--comment 1--><!--comment 2-->\
3983 <![CDATA[]]>\
3984 ",
3985 );
3986 assert_eq!(de.next().unwrap(), DeEvent::Text("text ".into()));
3987 }
3988
3989 #[test]
3990 fn cdata_and_text() {
3991 let mut de = make_de(
3992 "\
3993 <![CDATA[cdata ]]>\
3994 <!--comment 1--><!--comment 2--> \
3995 text \
3996 ",
3997 );
3998 assert_eq!(de.next().unwrap(), DeEvent::Text("cdata text".into()));
3999 }
4000
4001 #[test]
4002 fn empty_cdata_and_text() {
4003 let mut de = make_de(
4004 "\
4005 <![CDATA[]]>\
4006 <!--comment 1--><!--comment 2--> \
4007 text \
4008 ",
4009 );
4010 assert_eq!(de.next().unwrap(), DeEvent::Text(" text".into()));
4011 }
4012
4013 #[test]
4014 fn cdata_and_cdata() {
4015 let mut de = make_de(
4016 "\
4017 <![CDATA[cdata]]]>\
4018 <!--comment 1--><!--comment 2-->\
4019 <![CDATA[]>cdata]]>\
4020 ",
4021 );
4022 assert_eq!(de.next().unwrap(), DeEvent::Text("cdata]]>cdata".into()));
4023 }
4024 }
4025
4026 mod pi_between {
4027 use super::*;
4028 use pretty_assertions::assert_eq;
4029
4030 #[test]
4031 fn text() {
4032 let mut de = make_de(
4033 "\
4034 text \
4035 <?pi 1?><?pi 2?> \
4036 text\
4037 ",
4038 );
4039 assert_eq!(de.next().unwrap(), DeEvent::Text("text text".into()));
4040 }
4041
4042 #[test]
4043 fn cdata() {
4044 let mut de = make_de(
4045 "\
4046 <![CDATA[cdata]]]]>\
4047 <?pi 1?><?pi 2?>\
4048 <![CDATA[>cdata]]>\
4049 ",
4050 );
4051 assert_eq!(de.next().unwrap(), DeEvent::Text("cdata]]>cdata".into()));
4052 }
4053
4054 #[test]
4055 fn text_and_cdata() {
4056 let mut de = make_de(
4057 "\
4058 text \
4059 <?pi 1?><?pi 2?>\
4060 <![CDATA[ cdata]]>\
4061 ",
4062 );
4063 assert_eq!(de.next().unwrap(), DeEvent::Text("text cdata".into()));
4064 }
4065
4066 #[test]
4067 fn text_and_empty_cdata() {
4068 let mut de = make_de(
4069 "\
4070 text \
4071 <?pi 1?><?pi 2?>\
4072 <![CDATA[]]>\
4073 ",
4074 );
4075 assert_eq!(de.next().unwrap(), DeEvent::Text("text ".into()));
4076 }
4077
4078 #[test]
4079 fn cdata_and_text() {
4080 let mut de = make_de(
4081 "\
4082 <![CDATA[cdata ]]>\
4083 <?pi 1?><?pi 2?> \
4084 text \
4085 ",
4086 );
4087 assert_eq!(de.next().unwrap(), DeEvent::Text("cdata text".into()));
4088 }
4089
4090 #[test]
4091 fn empty_cdata_and_text() {
4092 let mut de = make_de(
4093 "\
4094 <![CDATA[]]>\
4095 <?pi 1?><?pi 2?> \
4096 text \
4097 ",
4098 );
4099 assert_eq!(de.next().unwrap(), DeEvent::Text(" text".into()));
4100 }
4101
4102 #[test]
4103 fn cdata_and_cdata() {
4104 let mut de = make_de(
4105 "\
4106 <![CDATA[cdata]]]>\
4107 <?pi 1?><?pi 2?>\
4108 <![CDATA[]>cdata]]>\
4109 ",
4110 );
4111 assert_eq!(de.next().unwrap(), DeEvent::Text("cdata]]>cdata".into()));
4112 }
4113 }
4114 }
4115
4116 /// Tests for https://github.com/tafia/quick-xml/issues/474.
4117 ///
4118 /// This tests ensures that any combination of payload data is processed
4119 /// as expected.
4120 mod triples {
4121 use super::*;
4122 use pretty_assertions::assert_eq;
4123
4124 mod start {
4125 use super::*;
4126
4127 /// <tag1><tag2>...
4128 mod start {
4129 use super::*;
4130 use pretty_assertions::assert_eq;
4131
4132 #[test]
4133 fn start() {
4134 let mut de = make_de("<tag1><tag2><tag3>");
4135 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1")));
4136 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2")));
4137 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag3")));
4138 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4139 }
4140
4141 /// Not matching end tag will result to error
4142 #[test]
4143 fn end() {
4144 let mut de = make_de("<tag1><tag2></tag2>");
4145 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1")));
4146 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2")));
4147 assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag2")));
4148 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4149 }
4150
4151 #[test]
4152 fn text() {
4153 let mut de = make_de("<tag1><tag2> text ");
4154 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1")));
4155 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2")));
4156 // Text is trimmed from both sides
4157 assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4158 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4159 }
4160
4161 #[test]
4162 fn cdata() {
4163 let mut de = make_de("<tag1><tag2><![CDATA[ cdata ]]>");
4164 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1")));
4165 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2")));
4166 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4167 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4168 }
4169
4170 #[test]
4171 fn eof() {
4172 let mut de = make_de("<tag1><tag2>");
4173 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1")));
4174 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2")));
4175 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4176 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4177 }
4178 }
4179
4180 /// <tag></tag>...
4181 mod end {
4182 use super::*;
4183 use pretty_assertions::assert_eq;
4184
4185 #[test]
4186 fn start() {
4187 let mut de = make_de("<tag></tag><tag2>");
4188 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4189 assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag")));
4190 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2")));
4191 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4192 }
4193
4194 #[test]
4195 fn end() {
4196 let mut de = make_de("<tag></tag></tag2>");
4197 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4198 assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag")));
4199 match de.next() {
4200 Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
4201 assert_eq!(cause, IllFormedError::UnmatchedEndTag("tag2".into()));
4202 }
4203 x => panic!(
4204 "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
4205 x
4206 ),
4207 }
4208 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4209 }
4210
4211 #[test]
4212 fn text() {
4213 let mut de = make_de("<tag></tag> text ");
4214 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4215 assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag")));
4216 // Text is trimmed from both sides
4217 assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4218 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4219 }
4220
4221 #[test]
4222 fn cdata() {
4223 let mut de = make_de("<tag></tag><![CDATA[ cdata ]]>");
4224 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4225 assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag")));
4226 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4227 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4228 }
4229
4230 #[test]
4231 fn eof() {
4232 let mut de = make_de("<tag></tag>");
4233 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4234 assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag")));
4235 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4236 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4237 }
4238 }
4239
4240 /// <tag> text ...
4241 mod text {
4242 use super::*;
4243 use pretty_assertions::assert_eq;
4244
4245 #[test]
4246 fn start() {
4247 let mut de = make_de("<tag> text <tag2>");
4248 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4249 // Text is trimmed from both sides
4250 assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4251 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2")));
4252 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4253 }
4254
4255 #[test]
4256 fn end() {
4257 let mut de = make_de("<tag> text </tag>");
4258 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4259 // Text is trimmed from both sides
4260 assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4261 assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag")));
4262 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4263 }
4264
4265 // start::text::text has no difference from start::text
4266
4267 #[test]
4268 fn cdata() {
4269 let mut de = make_de("<tag> text <![CDATA[ cdata ]]>");
4270 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4271 // Text is trimmed from the start
4272 assert_eq!(de.next().unwrap(), DeEvent::Text("text cdata ".into()));
4273 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4274 }
4275
4276 #[test]
4277 fn eof() {
4278 let mut de = make_de("<tag> text ");
4279 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4280 // Text is trimmed from both sides
4281 assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4282 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4283 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4284 }
4285 }
4286
4287 /// <tag><![CDATA[ cdata ]]>...
4288 mod cdata {
4289 use super::*;
4290 use pretty_assertions::assert_eq;
4291
4292 #[test]
4293 fn start() {
4294 let mut de = make_de("<tag><![CDATA[ cdata ]]><tag2>");
4295 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4296 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4297 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2")));
4298 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4299 }
4300
4301 #[test]
4302 fn end() {
4303 let mut de = make_de("<tag><![CDATA[ cdata ]]></tag>");
4304 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4305 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4306 assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag")));
4307 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4308 }
4309
4310 #[test]
4311 fn text() {
4312 let mut de = make_de("<tag><![CDATA[ cdata ]]> text ");
4313 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4314 // Text is trimmed from the end
4315 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata text".into()));
4316 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4317 }
4318
4319 #[test]
4320 fn cdata() {
4321 let mut de = make_de("<tag><![CDATA[ cdata ]]><![CDATA[ cdata2 ]]>");
4322 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4323 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata cdata2 ".into()));
4324 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4325 }
4326
4327 #[test]
4328 fn eof() {
4329 let mut de = make_de("<tag><![CDATA[ cdata ]]>");
4330 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4331 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4332 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4333 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4334 }
4335 }
4336 }
4337
4338 /// Start from End event will always generate an error
4339 #[test]
4340 fn end() {
4341 let mut de = make_de("</tag>");
4342 match de.next() {
4343 Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
4344 assert_eq!(cause, IllFormedError::UnmatchedEndTag("tag".into()));
4345 }
4346 x => panic!(
4347 "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
4348 x
4349 ),
4350 }
4351 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4352 }
4353
4354 mod text {
4355 use super::*;
4356 use pretty_assertions::assert_eq;
4357
4358 mod start {
4359 use super::*;
4360 use pretty_assertions::assert_eq;
4361
4362 #[test]
4363 fn start() {
4364 let mut de = make_de(" text <tag1><tag2>");
4365 // Text is trimmed from both sides
4366 assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4367 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1")));
4368 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2")));
4369 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4370 }
4371
4372 /// Not matching end tag will result in error
4373 #[test]
4374 fn end() {
4375 let mut de = make_de(" text <tag></tag>");
4376 // Text is trimmed from both sides
4377 assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4378 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4379 assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag")));
4380 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4381 }
4382
4383 #[test]
4384 fn text() {
4385 let mut de = make_de(" text <tag> text2 ");
4386 // Text is trimmed from both sides
4387 assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4388 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4389 // Text is trimmed from both sides
4390 assert_eq!(de.next().unwrap(), DeEvent::Text("text2".into()));
4391 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4392 }
4393
4394 #[test]
4395 fn cdata() {
4396 let mut de = make_de(" text <tag><![CDATA[ cdata ]]>");
4397 // Text is trimmed from both sides
4398 assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4399 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4400 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4401 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4402 }
4403
4404 #[test]
4405 fn eof() {
4406 // Text is trimmed from both sides
4407 let mut de = make_de(" text <tag>");
4408 assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4409 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4410 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4411 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4412 }
4413 }
4414
4415 /// End event without corresponding start event will always generate an error
4416 #[test]
4417 fn end() {
4418 let mut de = make_de(" text </tag>");
4419 // Text is trimmed from both sides
4420 assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4421 match de.next() {
4422 Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
4423 assert_eq!(cause, IllFormedError::UnmatchedEndTag("tag".into()));
4424 }
4425 x => panic!(
4426 "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
4427 x
4428 ),
4429 }
4430 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4431 }
4432
4433 // text::text::something is equivalent to text::something
4434
4435 mod cdata {
4436 use super::*;
4437 use pretty_assertions::assert_eq;
4438
4439 #[test]
4440 fn start() {
4441 let mut de = make_de(" text <![CDATA[ cdata ]]><tag>");
4442 // Text is trimmed from the start
4443 assert_eq!(de.next().unwrap(), DeEvent::Text("text cdata ".into()));
4444 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4445 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4446 }
4447
4448 #[test]
4449 fn end() {
4450 let mut de = make_de(" text <![CDATA[ cdata ]]></tag>");
4451 // Text is trimmed from the start
4452 assert_eq!(de.next().unwrap(), DeEvent::Text("text cdata ".into()));
4453 match de.next() {
4454 Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
4455 assert_eq!(cause, IllFormedError::UnmatchedEndTag("tag".into()));
4456 }
4457 x => panic!(
4458 "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
4459 x
4460 ),
4461 }
4462 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4463 }
4464
4465 #[test]
4466 fn text() {
4467 let mut de = make_de(" text <![CDATA[ cdata ]]> text2 ");
4468 // Text is trimmed from the start and from the end
4469 assert_eq!(
4470 de.next().unwrap(),
4471 DeEvent::Text("text cdata text2".into())
4472 );
4473 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4474 }
4475
4476 #[test]
4477 fn cdata() {
4478 let mut de = make_de(" text <![CDATA[ cdata ]]><![CDATA[ cdata2 ]]>");
4479 // Text is trimmed from the start
4480 assert_eq!(
4481 de.next().unwrap(),
4482 DeEvent::Text("text cdata cdata2 ".into())
4483 );
4484 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4485 }
4486
4487 #[test]
4488 fn eof() {
4489 let mut de = make_de(" text <![CDATA[ cdata ]]>");
4490 // Text is trimmed from the start
4491 assert_eq!(de.next().unwrap(), DeEvent::Text("text cdata ".into()));
4492 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4493 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4494 }
4495 }
4496 }
4497
4498 mod cdata {
4499 use super::*;
4500 use pretty_assertions::assert_eq;
4501
4502 mod start {
4503 use super::*;
4504 use pretty_assertions::assert_eq;
4505
4506 #[test]
4507 fn start() {
4508 let mut de = make_de("<![CDATA[ cdata ]]><tag1><tag2>");
4509 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4510 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1")));
4511 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2")));
4512 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4513 }
4514
4515 /// Not matching end tag will result in error
4516 #[test]
4517 fn end() {
4518 let mut de = make_de("<![CDATA[ cdata ]]><tag></tag>");
4519 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4520 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4521 assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag")));
4522 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4523 }
4524
4525 #[test]
4526 fn text() {
4527 let mut de = make_de("<![CDATA[ cdata ]]><tag> text ");
4528 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4529 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4530 // Text is trimmed from both sides
4531 assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4532 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4533 }
4534
4535 #[test]
4536 fn cdata() {
4537 let mut de = make_de("<![CDATA[ cdata ]]><tag><![CDATA[ cdata2 ]]>");
4538 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4539 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4540 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata2 ".into()));
4541 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4542 }
4543
4544 #[test]
4545 fn eof() {
4546 let mut de = make_de("<![CDATA[ cdata ]]><tag>");
4547 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4548 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4549 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4550 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4551 }
4552 }
4553
4554 /// End event without corresponding start event will always generate an error
4555 #[test]
4556 fn end() {
4557 let mut de = make_de("<![CDATA[ cdata ]]></tag>");
4558 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4559 match de.next() {
4560 Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
4561 assert_eq!(cause, IllFormedError::UnmatchedEndTag("tag".into()));
4562 }
4563 x => panic!(
4564 "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
4565 x
4566 ),
4567 }
4568 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4569 }
4570
4571 mod text {
4572 use super::*;
4573 use pretty_assertions::assert_eq;
4574
4575 #[test]
4576 fn start() {
4577 let mut de = make_de("<![CDATA[ cdata ]]> text <tag>");
4578 // Text is trimmed from the end
4579 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata text".into()));
4580 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4581 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4582 }
4583
4584 #[test]
4585 fn end() {
4586 let mut de = make_de("<![CDATA[ cdata ]]> text </tag>");
4587 // Text is trimmed from the end
4588 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata text".into()));
4589 match de.next() {
4590 Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
4591 assert_eq!(cause, IllFormedError::UnmatchedEndTag("tag".into()));
4592 }
4593 x => panic!(
4594 "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
4595 x
4596 ),
4597 }
4598 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4599 }
4600
4601 // cdata::text::text is equivalent to cdata::text
4602
4603 #[test]
4604 fn cdata() {
4605 let mut de = make_de("<![CDATA[ cdata ]]> text <![CDATA[ cdata2 ]]>");
4606 assert_eq!(
4607 de.next().unwrap(),
4608 DeEvent::Text(" cdata text cdata2 ".into())
4609 );
4610 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4611 }
4612
4613 #[test]
4614 fn eof() {
4615 let mut de = make_de("<![CDATA[ cdata ]]> text ");
4616 // Text is trimmed from the end
4617 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata text".into()));
4618 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4619 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4620 }
4621 }
4622
4623 mod cdata {
4624 use super::*;
4625 use pretty_assertions::assert_eq;
4626
4627 #[test]
4628 fn start() {
4629 let mut de = make_de("<![CDATA[ cdata ]]><![CDATA[ cdata2 ]]><tag>");
4630 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata cdata2 ".into()));
4631 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4632 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4633 }
4634
4635 #[test]
4636 fn end() {
4637 let mut de = make_de("<![CDATA[ cdata ]]><![CDATA[ cdata2 ]]></tag>");
4638 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata cdata2 ".into()));
4639 match de.next() {
4640 Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
4641 assert_eq!(cause, IllFormedError::UnmatchedEndTag("tag".into()));
4642 }
4643 x => panic!(
4644 "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
4645 x
4646 ),
4647 }
4648 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4649 }
4650
4651 #[test]
4652 fn text() {
4653 let mut de = make_de("<![CDATA[ cdata ]]><![CDATA[ cdata2 ]]> text ");
4654 // Text is trimmed from the end
4655 assert_eq!(
4656 de.next().unwrap(),
4657 DeEvent::Text(" cdata cdata2 text".into())
4658 );
4659 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4660 }
4661
4662 #[test]
4663 fn cdata() {
4664 let mut de =
4665 make_de("<![CDATA[ cdata ]]><![CDATA[ cdata2 ]]><![CDATA[ cdata3 ]]>");
4666 assert_eq!(
4667 de.next().unwrap(),
4668 DeEvent::Text(" cdata cdata2 cdata3 ".into())
4669 );
4670 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4671 }
4672
4673 #[test]
4674 fn eof() {
4675 let mut de = make_de("<![CDATA[ cdata ]]><![CDATA[ cdata2 ]]>");
4676 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata cdata2 ".into()));
4677 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4678 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4679 }
4680 }
4681 }
4682 }
4683}