quick_xml/events/
mod.rs

1//! Defines zero-copy XML events used throughout this library.
2//!
3//! A XML event often represents part of a XML element.
4//! They occur both during reading and writing and are
5//! usually used with the stream-oriented API.
6//!
7//! For example, the XML element
8//! ```xml
9//! <name attr="value">Inner text</name>
10//! ```
11//! consists of the three events `Start`, `Text` and `End`.
12//! They can also represent other parts in an XML document like the
13//! XML declaration. Each Event usually contains further information,
14//! like the tag name, the attribute or the inner text.
15//!
16//! See [`Event`] for a list of all possible events.
17//!
18//! # Reading
19//! When reading a XML stream, the events are emitted by [`Reader::read_event`]
20//! and [`Reader::read_event_into`]. You must listen
21//! for the different types of events you are interested in.
22//!
23//! See [`Reader`] for further information.
24//!
25//! # Writing
26//! When writing the XML document, you must create the XML element
27//! by constructing the events it consists of and pass them to the writer
28//! sequentially.
29//!
30//! See [`Writer`] for further information.
31//!
32//! [`Reader::read_event`]: crate::reader::Reader::read_event
33//! [`Reader::read_event_into`]: crate::reader::Reader::read_event_into
34//! [`Reader`]: crate::reader::Reader
35//! [`Writer`]: crate::writer::Writer
36//! [`Event`]: crate::events::Event
37
38pub mod attributes;
39
40#[cfg(feature = "encoding")]
41use encoding_rs::Encoding;
42use std::borrow::Cow;
43use std::fmt::{self, Debug, Formatter};
44use std::iter::FusedIterator;
45use std::mem::replace;
46use std::ops::Deref;
47use std::str::from_utf8;
48
49use crate::encoding::{Decoder, EncodingError};
50use crate::errors::{Error, IllFormedError};
51use crate::escape::{
52    escape, minimal_escape, partial_escape, resolve_predefined_entity, unescape_with,
53};
54use crate::name::{LocalName, QName};
55#[cfg(feature = "serialize")]
56use crate::utils::CowRef;
57use crate::utils::{name_len, trim_xml_end, trim_xml_start, write_cow_string, Bytes};
58use attributes::{AttrError, Attribute, Attributes};
59
60/// Opening tag data (`Event::Start`), with optional attributes: `<name attr="value">`.
61///
62/// The name can be accessed using the [`name`] or [`local_name`] methods.
63/// An iterator over the attributes is returned by the [`attributes`] method.
64///
65/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
66/// returns the content of this event between `<` and `>` or `/>`:
67///
68/// ```
69/// # use quick_xml::events::{BytesStart, Event};
70/// # use quick_xml::reader::Reader;
71/// # use pretty_assertions::assert_eq;
72/// // Remember, that \ at the end of string literal strips
73/// // all space characters to the first non-space character
74/// let mut reader = Reader::from_str("\
75///     <element a1 = 'val1' a2=\"val2\" />\
76///     <element a1 = 'val1' a2=\"val2\" >"
77/// );
78/// let content = "element a1 = 'val1' a2=\"val2\" ";
79/// let event = BytesStart::from_content(content, 7);
80///
81/// assert_eq!(reader.read_event().unwrap(), Event::Empty(event.borrow()));
82/// assert_eq!(reader.read_event().unwrap(), Event::Start(event.borrow()));
83/// // deref coercion of &BytesStart to &[u8]
84/// assert_eq!(&event as &[u8], content.as_bytes());
85/// // AsRef<[u8]> for &T + deref coercion
86/// assert_eq!(event.as_ref(), content.as_bytes());
87/// ```
88///
89/// [`name`]: Self::name
90/// [`local_name`]: Self::local_name
91/// [`attributes`]: Self::attributes
92#[derive(Clone, Eq, PartialEq)]
93pub struct BytesStart<'a> {
94    /// content of the element, before any utf8 conversion
95    pub(crate) buf: Cow<'a, [u8]>,
96    /// end of the element name, the name starts at that the start of `buf`
97    pub(crate) name_len: usize,
98}
99
100impl<'a> BytesStart<'a> {
101    /// Internal constructor, used by `Reader`. Supplies data in reader's encoding
102    #[inline]
103    pub(crate) const fn wrap(content: &'a [u8], name_len: usize) -> Self {
104        BytesStart {
105            buf: Cow::Borrowed(content),
106            name_len,
107        }
108    }
109
110    /// Creates a new `BytesStart` from the given name.
111    ///
112    /// # Warning
113    ///
114    /// `name` must be a valid name.
115    #[inline]
116    pub fn new<C: Into<Cow<'a, str>>>(name: C) -> Self {
117        let buf = str_cow_to_bytes(name);
118        BytesStart {
119            name_len: buf.len(),
120            buf,
121        }
122    }
123
124    /// Creates a new `BytesStart` from the given content (name + attributes).
125    ///
126    /// # Warning
127    ///
128    /// `&content[..name_len]` must be a valid name, and the remainder of `content`
129    /// must be correctly-formed attributes. Neither are checked, it is possible
130    /// to generate invalid XML if `content` or `name_len` are incorrect.
131    #[inline]
132    pub fn from_content<C: Into<Cow<'a, str>>>(content: C, name_len: usize) -> Self {
133        BytesStart {
134            buf: str_cow_to_bytes(content),
135            name_len,
136        }
137    }
138
139    /// Converts the event into an owned event.
140    pub fn into_owned(self) -> BytesStart<'static> {
141        BytesStart {
142            buf: Cow::Owned(self.buf.into_owned()),
143            name_len: self.name_len,
144        }
145    }
146
147    /// Converts the event into an owned event without taking ownership of Event
148    pub fn to_owned(&self) -> BytesStart<'static> {
149        BytesStart {
150            buf: Cow::Owned(self.buf.clone().into_owned()),
151            name_len: self.name_len,
152        }
153    }
154
155    /// Converts the event into a borrowed event. Most useful when paired with [`to_end`].
156    ///
157    /// # Example
158    ///
159    /// ```
160    /// use quick_xml::events::{BytesStart, Event};
161    /// # use quick_xml::writer::Writer;
162    /// # use quick_xml::Error;
163    ///
164    /// struct SomeStruct<'a> {
165    ///     attrs: BytesStart<'a>,
166    ///     // ...
167    /// }
168    /// # impl<'a> SomeStruct<'a> {
169    /// # fn example(&self) -> Result<(), Error> {
170    /// # let mut writer = Writer::new(Vec::new());
171    ///
172    /// writer.write_event(Event::Start(self.attrs.borrow()))?;
173    /// // ...
174    /// writer.write_event(Event::End(self.attrs.to_end()))?;
175    /// # Ok(())
176    /// # }}
177    /// ```
178    ///
179    /// [`to_end`]: Self::to_end
180    pub fn borrow(&self) -> BytesStart {
181        BytesStart {
182            buf: Cow::Borrowed(&self.buf),
183            name_len: self.name_len,
184        }
185    }
186
187    /// Creates new paired close tag
188    #[inline]
189    pub fn to_end(&self) -> BytesEnd {
190        BytesEnd::from(self.name())
191    }
192
193    /// Gets the undecoded raw tag name, as present in the input stream.
194    #[inline]
195    pub fn name(&self) -> QName {
196        QName(&self.buf[..self.name_len])
197    }
198
199    /// Gets the undecoded raw local tag name (excluding namespace) as present
200    /// in the input stream.
201    ///
202    /// All content up to and including the first `:` character is removed from the tag name.
203    #[inline]
204    pub fn local_name(&self) -> LocalName {
205        self.name().into()
206    }
207
208    /// Edit the name of the BytesStart in-place
209    ///
210    /// # Warning
211    ///
212    /// `name` must be a valid name.
213    pub fn set_name(&mut self, name: &[u8]) -> &mut BytesStart<'a> {
214        let bytes = self.buf.to_mut();
215        bytes.splice(..self.name_len, name.iter().cloned());
216        self.name_len = name.len();
217        self
218    }
219
220    /// Gets the undecoded raw tag name, as present in the input stream, which
221    /// is borrowed either to the input, or to the event.
222    ///
223    /// # Lifetimes
224    ///
225    /// - `'a`: Lifetime of the input data from which this event is borrow
226    /// - `'e`: Lifetime of the concrete event instance
227    // TODO: We should made this is a part of public API, but with safe wrapped for a name
228    #[cfg(feature = "serialize")]
229    pub(crate) fn raw_name<'e>(&'e self) -> CowRef<'a, 'e, [u8]> {
230        match self.buf {
231            Cow::Borrowed(b) => CowRef::Input(&b[..self.name_len]),
232            Cow::Owned(ref o) => CowRef::Slice(&o[..self.name_len]),
233        }
234    }
235}
236
237/// Attribute-related methods
238impl<'a> BytesStart<'a> {
239    /// Consumes `self` and yield a new `BytesStart` with additional attributes from an iterator.
240    ///
241    /// The yielded items must be convertible to [`Attribute`] using `Into`.
242    pub fn with_attributes<'b, I>(mut self, attributes: I) -> Self
243    where
244        I: IntoIterator,
245        I::Item: Into<Attribute<'b>>,
246    {
247        self.extend_attributes(attributes);
248        self
249    }
250
251    /// Add additional attributes to this tag using an iterator.
252    ///
253    /// The yielded items must be convertible to [`Attribute`] using `Into`.
254    pub fn extend_attributes<'b, I>(&mut self, attributes: I) -> &mut BytesStart<'a>
255    where
256        I: IntoIterator,
257        I::Item: Into<Attribute<'b>>,
258    {
259        for attr in attributes {
260            self.push_attribute(attr);
261        }
262        self
263    }
264
265    /// Adds an attribute to this element.
266    pub fn push_attribute<'b, A>(&mut self, attr: A)
267    where
268        A: Into<Attribute<'b>>,
269    {
270        self.buf.to_mut().push(b' ');
271        self.push_attr(attr.into());
272    }
273
274    /// Remove all attributes from the ByteStart
275    pub fn clear_attributes(&mut self) -> &mut BytesStart<'a> {
276        self.buf.to_mut().truncate(self.name_len);
277        self
278    }
279
280    /// Returns an iterator over the attributes of this tag.
281    pub fn attributes(&self) -> Attributes {
282        Attributes::wrap(&self.buf, self.name_len, false)
283    }
284
285    /// Returns an iterator over the HTML-like attributes of this tag (no mandatory quotes or `=`).
286    pub fn html_attributes(&self) -> Attributes {
287        Attributes::wrap(&self.buf, self.name_len, true)
288    }
289
290    /// Gets the undecoded raw string with the attributes of this tag as a `&[u8]`,
291    /// including the whitespace after the tag name if there is any.
292    #[inline]
293    pub fn attributes_raw(&self) -> &[u8] {
294        &self.buf[self.name_len..]
295    }
296
297    /// Try to get an attribute
298    pub fn try_get_attribute<N: AsRef<[u8]> + Sized>(
299        &'a self,
300        attr_name: N,
301    ) -> Result<Option<Attribute<'a>>, AttrError> {
302        for a in self.attributes().with_checks(false) {
303            let a = a?;
304            if a.key.as_ref() == attr_name.as_ref() {
305                return Ok(Some(a));
306            }
307        }
308        Ok(None)
309    }
310
311    /// Adds an attribute to this element.
312    pub(crate) fn push_attr<'b>(&mut self, attr: Attribute<'b>) {
313        let bytes = self.buf.to_mut();
314        bytes.extend_from_slice(attr.key.as_ref());
315        bytes.extend_from_slice(b"=\"");
316        // FIXME: need to escape attribute content
317        bytes.extend_from_slice(attr.value.as_ref());
318        bytes.push(b'"');
319    }
320
321    /// Adds new line in existing element
322    pub(crate) fn push_newline(&mut self) {
323        self.buf.to_mut().push(b'\n');
324    }
325
326    /// Adds indentation bytes in existing element
327    pub(crate) fn push_indent(&mut self, indent: &[u8]) {
328        self.buf.to_mut().extend_from_slice(indent);
329    }
330}
331
332impl<'a> Debug for BytesStart<'a> {
333    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
334        write!(f, "BytesStart {{ buf: ")?;
335        write_cow_string(f, &self.buf)?;
336        write!(f, ", name_len: {} }}", self.name_len)
337    }
338}
339
340impl<'a> Deref for BytesStart<'a> {
341    type Target = [u8];
342
343    fn deref(&self) -> &[u8] {
344        &self.buf
345    }
346}
347
348impl<'a> From<QName<'a>> for BytesStart<'a> {
349    #[inline]
350    fn from(name: QName<'a>) -> Self {
351        let name = name.into_inner();
352        Self::wrap(name, name.len())
353    }
354}
355
356#[cfg(feature = "arbitrary")]
357impl<'a> arbitrary::Arbitrary<'a> for BytesStart<'a> {
358    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
359        let s = <&str>::arbitrary(u)?;
360        if s.is_empty() || !s.chars().all(char::is_alphanumeric) {
361            return Err(arbitrary::Error::IncorrectFormat);
362        }
363        let mut result = Self::new(s);
364        result.extend_attributes(Vec::<(&str, &str)>::arbitrary(u)?.into_iter());
365        Ok(result)
366    }
367
368    fn size_hint(depth: usize) -> (usize, Option<usize>) {
369        return <&str as arbitrary::Arbitrary>::size_hint(depth);
370    }
371}
372////////////////////////////////////////////////////////////////////////////////////////////////////
373
374/// Closing tag data (`Event::End`): `</name>`.
375///
376/// The name can be accessed using the [`name`] or [`local_name`] methods.
377///
378/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
379/// returns the content of this event between `</` and `>`.
380///
381/// Note, that inner text will not contain `>` character inside:
382///
383/// ```
384/// # use quick_xml::events::{BytesEnd, Event};
385/// # use quick_xml::reader::Reader;
386/// # use pretty_assertions::assert_eq;
387/// let mut reader = Reader::from_str(r#"<element></element a1 = 'val1' a2="val2" >"#);
388/// // Note, that this entire string considered as a .name()
389/// let content = "element a1 = 'val1' a2=\"val2\" ";
390/// let event = BytesEnd::new(content);
391///
392/// reader.config_mut().trim_markup_names_in_closing_tags = false;
393/// reader.config_mut().check_end_names = false;
394/// reader.read_event().unwrap(); // Skip `<element>`
395///
396/// assert_eq!(reader.read_event().unwrap(), Event::End(event.borrow()));
397/// assert_eq!(event.name().as_ref(), content.as_bytes());
398/// // deref coercion of &BytesEnd to &[u8]
399/// assert_eq!(&event as &[u8], content.as_bytes());
400/// // AsRef<[u8]> for &T + deref coercion
401/// assert_eq!(event.as_ref(), content.as_bytes());
402/// ```
403///
404/// [`name`]: Self::name
405/// [`local_name`]: Self::local_name
406#[derive(Clone, Eq, PartialEq)]
407pub struct BytesEnd<'a> {
408    name: Cow<'a, [u8]>,
409}
410
411impl<'a> BytesEnd<'a> {
412    /// Internal constructor, used by `Reader`. Supplies data in reader's encoding
413    #[inline]
414    pub(crate) const fn wrap(name: Cow<'a, [u8]>) -> Self {
415        BytesEnd { name }
416    }
417
418    /// Creates a new `BytesEnd` borrowing a slice.
419    ///
420    /// # Warning
421    ///
422    /// `name` must be a valid name.
423    #[inline]
424    pub fn new<C: Into<Cow<'a, str>>>(name: C) -> Self {
425        Self::wrap(str_cow_to_bytes(name))
426    }
427
428    /// Converts the event into an owned event.
429    pub fn into_owned(self) -> BytesEnd<'static> {
430        BytesEnd {
431            name: Cow::Owned(self.name.into_owned()),
432        }
433    }
434
435    /// Converts the event into a borrowed event.
436    #[inline]
437    pub fn borrow(&self) -> BytesEnd {
438        BytesEnd {
439            name: Cow::Borrowed(&self.name),
440        }
441    }
442
443    /// Gets the undecoded raw tag name, as present in the input stream.
444    #[inline]
445    pub fn name(&self) -> QName {
446        QName(&self.name)
447    }
448
449    /// Gets the undecoded raw local tag name (excluding namespace) as present
450    /// in the input stream.
451    ///
452    /// All content up to and including the first `:` character is removed from the tag name.
453    #[inline]
454    pub fn local_name(&self) -> LocalName {
455        self.name().into()
456    }
457}
458
459impl<'a> Debug for BytesEnd<'a> {
460    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
461        write!(f, "BytesEnd {{ name: ")?;
462        write_cow_string(f, &self.name)?;
463        write!(f, " }}")
464    }
465}
466
467impl<'a> Deref for BytesEnd<'a> {
468    type Target = [u8];
469
470    fn deref(&self) -> &[u8] {
471        &self.name
472    }
473}
474
475impl<'a> From<QName<'a>> for BytesEnd<'a> {
476    #[inline]
477    fn from(name: QName<'a>) -> Self {
478        Self::wrap(name.into_inner().into())
479    }
480}
481
482#[cfg(feature = "arbitrary")]
483impl<'a> arbitrary::Arbitrary<'a> for BytesEnd<'a> {
484    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
485        Ok(Self::new(<&str>::arbitrary(u)?))
486    }
487    fn size_hint(depth: usize) -> (usize, Option<usize>) {
488        return <&str as arbitrary::Arbitrary>::size_hint(depth);
489    }
490}
491
492////////////////////////////////////////////////////////////////////////////////////////////////////
493
494/// Data from various events (most notably, `Event::Text`) that stored in XML
495/// in escaped form. Internally data is stored in escaped form.
496///
497/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
498/// returns the content of this event. In case of comment this is everything
499/// between `<!--` and `-->` and the text of comment will not contain `-->` inside.
500/// In case of DTD this is everything between `<!DOCTYPE` + spaces and closing `>`
501/// (i.e. in case of DTD the first character is never space):
502///
503/// ```
504/// # use quick_xml::events::{BytesText, Event};
505/// # use quick_xml::reader::Reader;
506/// # use pretty_assertions::assert_eq;
507/// // Remember, that \ at the end of string literal strips
508/// // all space characters to the first non-space character
509/// let mut reader = Reader::from_str("\
510///     <!DOCTYPE comment or text >\
511///     comment or text \
512///     <!--comment or text -->"
513/// );
514/// let content = "comment or text ";
515/// let event = BytesText::new(content);
516///
517/// assert_eq!(reader.read_event().unwrap(), Event::DocType(event.borrow()));
518/// assert_eq!(reader.read_event().unwrap(), Event::Text(event.borrow()));
519/// assert_eq!(reader.read_event().unwrap(), Event::Comment(event.borrow()));
520/// // deref coercion of &BytesText to &[u8]
521/// assert_eq!(&event as &[u8], content.as_bytes());
522/// // AsRef<[u8]> for &T + deref coercion
523/// assert_eq!(event.as_ref(), content.as_bytes());
524/// ```
525#[derive(Clone, Eq, PartialEq)]
526pub struct BytesText<'a> {
527    /// Escaped then encoded content of the event. Content is encoded in the XML
528    /// document encoding when event comes from the reader and should be in the
529    /// document encoding when event passed to the writer
530    content: Cow<'a, [u8]>,
531    /// Encoding in which the `content` is stored inside the event
532    decoder: Decoder,
533}
534
535impl<'a> BytesText<'a> {
536    /// Creates a new `BytesText` from an escaped byte sequence in the specified encoding.
537    #[inline]
538    pub(crate) fn wrap<C: Into<Cow<'a, [u8]>>>(content: C, decoder: Decoder) -> Self {
539        Self {
540            content: content.into(),
541            decoder,
542        }
543    }
544
545    /// Creates a new `BytesText` from an escaped string.
546    #[inline]
547    pub fn from_escaped<C: Into<Cow<'a, str>>>(content: C) -> Self {
548        Self::wrap(str_cow_to_bytes(content), Decoder::utf8())
549    }
550
551    /// Creates a new `BytesText` from a string. The string is expected not to
552    /// be escaped.
553    #[inline]
554    pub fn new(content: &'a str) -> Self {
555        Self::from_escaped(escape(content))
556    }
557
558    /// Ensures that all data is owned to extend the object's lifetime if
559    /// necessary.
560    #[inline]
561    pub fn into_owned(self) -> BytesText<'static> {
562        BytesText {
563            content: self.content.into_owned().into(),
564            decoder: self.decoder,
565        }
566    }
567
568    /// Extracts the inner `Cow` from the `BytesText` event container.
569    #[inline]
570    pub fn into_inner(self) -> Cow<'a, [u8]> {
571        self.content
572    }
573
574    /// Converts the event into a borrowed event.
575    #[inline]
576    pub fn borrow(&self) -> BytesText {
577        BytesText {
578            content: Cow::Borrowed(&self.content),
579            decoder: self.decoder,
580        }
581    }
582
583    /// Decodes then unescapes the content of the event.
584    ///
585    /// This will allocate if the value contains any escape sequences or in
586    /// non-UTF-8 encoding.
587    pub fn unescape(&self) -> Result<Cow<'a, str>, Error> {
588        self.unescape_with(resolve_predefined_entity)
589    }
590
591    /// Decodes then unescapes the content of the event with custom entities.
592    ///
593    /// This will allocate if the value contains any escape sequences or in
594    /// non-UTF-8 encoding.
595    pub fn unescape_with<'entity>(
596        &self,
597        resolve_entity: impl FnMut(&str) -> Option<&'entity str>,
598    ) -> Result<Cow<'a, str>, Error> {
599        let decoded = self.decoder.decode_cow(&self.content)?;
600
601        match unescape_with(&decoded, resolve_entity)? {
602            // Because result is borrowed, no replacements was done and we can use original string
603            Cow::Borrowed(_) => Ok(decoded),
604            Cow::Owned(s) => Ok(s.into()),
605        }
606    }
607
608    /// Removes leading XML whitespace bytes from text content.
609    ///
610    /// Returns `true` if content is empty after that
611    pub fn inplace_trim_start(&mut self) -> bool {
612        self.content = trim_cow(
613            replace(&mut self.content, Cow::Borrowed(b"")),
614            trim_xml_start,
615        );
616        self.content.is_empty()
617    }
618
619    /// Removes trailing XML whitespace bytes from text content.
620    ///
621    /// Returns `true` if content is empty after that
622    pub fn inplace_trim_end(&mut self) -> bool {
623        self.content = trim_cow(replace(&mut self.content, Cow::Borrowed(b"")), trim_xml_end);
624        self.content.is_empty()
625    }
626}
627
628impl<'a> Debug for BytesText<'a> {
629    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
630        write!(f, "BytesText {{ content: ")?;
631        write_cow_string(f, &self.content)?;
632        write!(f, " }}")
633    }
634}
635
636impl<'a> Deref for BytesText<'a> {
637    type Target = [u8];
638
639    fn deref(&self) -> &[u8] {
640        &self.content
641    }
642}
643
644#[cfg(feature = "arbitrary")]
645impl<'a> arbitrary::Arbitrary<'a> for BytesText<'a> {
646    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
647        let s = <&str>::arbitrary(u)?;
648        if !s.chars().all(char::is_alphanumeric) {
649            return Err(arbitrary::Error::IncorrectFormat);
650        }
651        Ok(Self::new(s))
652    }
653
654    fn size_hint(depth: usize) -> (usize, Option<usize>) {
655        return <&str as arbitrary::Arbitrary>::size_hint(depth);
656    }
657}
658
659////////////////////////////////////////////////////////////////////////////////////////////////////
660
661/// CDATA content contains unescaped data from the reader. If you want to write them as a text,
662/// [convert](Self::escape) it to [`BytesText`].
663///
664/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
665/// returns the content of this event between `<![CDATA[` and `]]>`.
666///
667/// Note, that inner text will not contain `]]>` sequence inside:
668///
669/// ```
670/// # use quick_xml::events::{BytesCData, Event};
671/// # use quick_xml::reader::Reader;
672/// # use pretty_assertions::assert_eq;
673/// let mut reader = Reader::from_str("<![CDATA[ CDATA section ]]>");
674/// let content = " CDATA section ";
675/// let event = BytesCData::new(content);
676///
677/// assert_eq!(reader.read_event().unwrap(), Event::CData(event.borrow()));
678/// // deref coercion of &BytesCData to &[u8]
679/// assert_eq!(&event as &[u8], content.as_bytes());
680/// // AsRef<[u8]> for &T + deref coercion
681/// assert_eq!(event.as_ref(), content.as_bytes());
682/// ```
683#[derive(Clone, Eq, PartialEq)]
684pub struct BytesCData<'a> {
685    content: Cow<'a, [u8]>,
686    /// Encoding in which the `content` is stored inside the event
687    decoder: Decoder,
688}
689
690impl<'a> BytesCData<'a> {
691    /// Creates a new `BytesCData` from a byte sequence in the specified encoding.
692    #[inline]
693    pub(crate) fn wrap<C: Into<Cow<'a, [u8]>>>(content: C, decoder: Decoder) -> Self {
694        Self {
695            content: content.into(),
696            decoder,
697        }
698    }
699
700    /// Creates a new `BytesCData` from a string.
701    ///
702    /// # Warning
703    ///
704    /// `content` must not contain the `]]>` sequence. You can use
705    /// [`BytesCData::escaped`] to escape the content instead.
706    #[inline]
707    pub fn new<C: Into<Cow<'a, str>>>(content: C) -> Self {
708        Self::wrap(str_cow_to_bytes(content), Decoder::utf8())
709    }
710
711    /// Creates an iterator of `BytesCData` from a string.
712    ///
713    /// If a string contains `]]>`, it needs to be split into multiple `CDATA`
714    /// sections, splitting the `]]` and `>` characters, because the CDATA closing
715    /// sequence cannot be escaped. This iterator yields a `BytesCData` instance
716    /// for each of those sections.
717    ///
718    /// # Examples
719    ///
720    /// ```
721    /// # use quick_xml::events::BytesCData;
722    /// # use pretty_assertions::assert_eq;
723    /// let content = "";
724    /// let cdata = BytesCData::escaped(content).collect::<Vec<_>>();
725    /// assert_eq!(cdata, &[BytesCData::new("")]);
726    ///
727    /// let content = "Certain tokens like ]]> can be difficult and <invalid>";
728    /// let cdata = BytesCData::escaped(content).collect::<Vec<_>>();
729    /// assert_eq!(cdata, &[
730    ///     BytesCData::new("Certain tokens like ]]"),
731    ///     BytesCData::new("> can be difficult and <invalid>"),
732    /// ]);
733    ///
734    /// let content = "foo]]>bar]]>baz]]>quux";
735    /// let cdata = BytesCData::escaped(content).collect::<Vec<_>>();
736    /// assert_eq!(cdata, &[
737    ///     BytesCData::new("foo]]"),
738    ///     BytesCData::new(">bar]]"),
739    ///     BytesCData::new(">baz]]"),
740    ///     BytesCData::new(">quux"),
741    /// ]);
742    /// ```
743    #[inline]
744    pub fn escaped(content: &'a str) -> CDataIterator<'a> {
745        CDataIterator {
746            unprocessed: content.as_bytes(),
747            finished: false,
748        }
749    }
750
751    /// Ensures that all data is owned to extend the object's lifetime if
752    /// necessary.
753    #[inline]
754    pub fn into_owned(self) -> BytesCData<'static> {
755        BytesCData {
756            content: self.content.into_owned().into(),
757            decoder: self.decoder,
758        }
759    }
760
761    /// Extracts the inner `Cow` from the `BytesCData` event container.
762    #[inline]
763    pub fn into_inner(self) -> Cow<'a, [u8]> {
764        self.content
765    }
766
767    /// Converts the event into a borrowed event.
768    #[inline]
769    pub fn borrow(&self) -> BytesCData {
770        BytesCData {
771            content: Cow::Borrowed(&self.content),
772            decoder: self.decoder,
773        }
774    }
775
776    /// Converts this CDATA content to an escaped version, that can be written
777    /// as an usual text in XML.
778    ///
779    /// This function performs following replacements:
780    ///
781    /// | Character | Replacement
782    /// |-----------|------------
783    /// | `<`       | `&lt;`
784    /// | `>`       | `&gt;`
785    /// | `&`       | `&amp;`
786    /// | `'`       | `&apos;`
787    /// | `"`       | `&quot;`
788    pub fn escape(self) -> Result<BytesText<'a>, EncodingError> {
789        let decoded = self.decode()?;
790        Ok(BytesText::wrap(
791            match escape(decoded) {
792                Cow::Borrowed(escaped) => Cow::Borrowed(escaped.as_bytes()),
793                Cow::Owned(escaped) => Cow::Owned(escaped.into_bytes()),
794            },
795            Decoder::utf8(),
796        ))
797    }
798
799    /// Converts this CDATA content to an escaped version, that can be written
800    /// as an usual text in XML.
801    ///
802    /// In XML text content, it is allowed (though not recommended) to leave
803    /// the quote special characters `"` and `'` unescaped.
804    ///
805    /// This function performs following replacements:
806    ///
807    /// | Character | Replacement
808    /// |-----------|------------
809    /// | `<`       | `&lt;`
810    /// | `>`       | `&gt;`
811    /// | `&`       | `&amp;`
812    pub fn partial_escape(self) -> Result<BytesText<'a>, EncodingError> {
813        let decoded = self.decode()?;
814        Ok(BytesText::wrap(
815            match partial_escape(decoded) {
816                Cow::Borrowed(escaped) => Cow::Borrowed(escaped.as_bytes()),
817                Cow::Owned(escaped) => Cow::Owned(escaped.into_bytes()),
818            },
819            Decoder::utf8(),
820        ))
821    }
822
823    /// Converts this CDATA content to an escaped version, that can be written
824    /// as an usual text in XML. This method escapes only those characters that
825    /// must be escaped according to the [specification].
826    ///
827    /// This function performs following replacements:
828    ///
829    /// | Character | Replacement
830    /// |-----------|------------
831    /// | `<`       | `&lt;`
832    /// | `&`       | `&amp;`
833    ///
834    /// [specification]: https://www.w3.org/TR/xml11/#syntax
835    pub fn minimal_escape(self) -> Result<BytesText<'a>, EncodingError> {
836        let decoded = self.decode()?;
837        Ok(BytesText::wrap(
838            match minimal_escape(decoded) {
839                Cow::Borrowed(escaped) => Cow::Borrowed(escaped.as_bytes()),
840                Cow::Owned(escaped) => Cow::Owned(escaped.into_bytes()),
841            },
842            Decoder::utf8(),
843        ))
844    }
845
846    /// Gets content of this text buffer in the specified encoding
847    pub(crate) fn decode(&self) -> Result<Cow<'a, str>, EncodingError> {
848        Ok(self.decoder.decode_cow(&self.content)?)
849    }
850}
851
852impl<'a> Debug for BytesCData<'a> {
853    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
854        write!(f, "BytesCData {{ content: ")?;
855        write_cow_string(f, &self.content)?;
856        write!(f, " }}")
857    }
858}
859
860impl<'a> Deref for BytesCData<'a> {
861    type Target = [u8];
862
863    fn deref(&self) -> &[u8] {
864        &self.content
865    }
866}
867
868#[cfg(feature = "arbitrary")]
869impl<'a> arbitrary::Arbitrary<'a> for BytesCData<'a> {
870    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
871        Ok(Self::new(<&str>::arbitrary(u)?))
872    }
873    fn size_hint(depth: usize) -> (usize, Option<usize>) {
874        return <&str as arbitrary::Arbitrary>::size_hint(depth);
875    }
876}
877
878/// Iterator over `CDATA` sections in a string.
879///
880/// This iterator is created by the [`BytesCData::escaped`] method.
881#[derive(Clone)]
882pub struct CDataIterator<'a> {
883    /// The unprocessed data which should be emitted as `BytesCData` events.
884    /// At each iteration, the processed data is cut from this slice.
885    unprocessed: &'a [u8],
886    finished: bool,
887}
888
889impl<'a> Debug for CDataIterator<'a> {
890    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
891        f.debug_struct("CDataIterator")
892            .field("unprocessed", &Bytes(self.unprocessed))
893            .field("finished", &self.finished)
894            .finish()
895    }
896}
897
898impl<'a> Iterator for CDataIterator<'a> {
899    type Item = BytesCData<'a>;
900
901    fn next(&mut self) -> Option<BytesCData<'a>> {
902        if self.finished {
903            return None;
904        }
905
906        for gt in memchr::memchr_iter(b'>', self.unprocessed) {
907            if self.unprocessed[..gt].ends_with(b"]]") {
908                let (slice, rest) = self.unprocessed.split_at(gt);
909                self.unprocessed = rest;
910                return Some(BytesCData::wrap(slice, Decoder::utf8()));
911            }
912        }
913
914        self.finished = true;
915        Some(BytesCData::wrap(self.unprocessed, Decoder::utf8()))
916    }
917}
918
919impl FusedIterator for CDataIterator<'_> {}
920
921////////////////////////////////////////////////////////////////////////////////////////////////////
922
923/// [Processing instructions][PI] (PIs) allow documents to contain instructions for applications.
924///
925/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
926/// returns the content of this event between `<?` and `?>`.
927///
928/// Note, that inner text will not contain `?>` sequence inside:
929///
930/// ```
931/// # use quick_xml::events::{BytesPI, Event};
932/// # use quick_xml::reader::Reader;
933/// # use pretty_assertions::assert_eq;
934/// let mut reader = Reader::from_str("<?processing instruction >:-<~ ?>");
935/// let content = "processing instruction >:-<~ ";
936/// let event = BytesPI::new(content);
937///
938/// assert_eq!(reader.read_event().unwrap(), Event::PI(event.borrow()));
939/// // deref coercion of &BytesPI to &[u8]
940/// assert_eq!(&event as &[u8], content.as_bytes());
941/// // AsRef<[u8]> for &T + deref coercion
942/// assert_eq!(event.as_ref(), content.as_bytes());
943/// ```
944///
945/// [PI]: https://www.w3.org/TR/xml11/#sec-pi
946#[derive(Clone, Eq, PartialEq)]
947pub struct BytesPI<'a> {
948    content: BytesStart<'a>,
949}
950
951impl<'a> BytesPI<'a> {
952    /// Creates a new `BytesPI` from a byte sequence in the specified encoding.
953    #[inline]
954    pub(crate) const fn wrap(content: &'a [u8], target_len: usize) -> Self {
955        Self {
956            content: BytesStart::wrap(content, target_len),
957        }
958    }
959
960    /// Creates a new `BytesPI` from a string.
961    ///
962    /// # Warning
963    ///
964    /// `content` must not contain the `?>` sequence.
965    #[inline]
966    pub fn new<C: Into<Cow<'a, str>>>(content: C) -> Self {
967        let buf = str_cow_to_bytes(content);
968        let name_len = name_len(&buf);
969        Self {
970            content: BytesStart { buf, name_len },
971        }
972    }
973
974    /// Ensures that all data is owned to extend the object's lifetime if
975    /// necessary.
976    #[inline]
977    pub fn into_owned(self) -> BytesPI<'static> {
978        BytesPI {
979            content: self.content.into_owned().into(),
980        }
981    }
982
983    /// Extracts the inner `Cow` from the `BytesPI` event container.
984    #[inline]
985    pub fn into_inner(self) -> Cow<'a, [u8]> {
986        self.content.buf
987    }
988
989    /// Converts the event into a borrowed event.
990    #[inline]
991    pub fn borrow(&self) -> BytesPI {
992        BytesPI {
993            content: self.content.borrow(),
994        }
995    }
996
997    /// A target used to identify the application to which the instruction is directed.
998    ///
999    /// # Example
1000    ///
1001    /// ```
1002    /// # use pretty_assertions::assert_eq;
1003    /// use quick_xml::events::BytesPI;
1004    ///
1005    /// let instruction = BytesPI::new(r#"xml-stylesheet href="style.css""#);
1006    /// assert_eq!(instruction.target(), b"xml-stylesheet");
1007    /// ```
1008    #[inline]
1009    pub fn target(&self) -> &[u8] {
1010        self.content.name().0
1011    }
1012
1013    /// Content of the processing instruction. Contains everything between target
1014    /// name and the end of the instruction. A direct consequence is that the first
1015    /// character is always a space character.
1016    ///
1017    /// # Example
1018    ///
1019    /// ```
1020    /// # use pretty_assertions::assert_eq;
1021    /// use quick_xml::events::BytesPI;
1022    ///
1023    /// let instruction = BytesPI::new(r#"xml-stylesheet href="style.css""#);
1024    /// assert_eq!(instruction.content(), br#" href="style.css""#);
1025    /// ```
1026    #[inline]
1027    pub fn content(&self) -> &[u8] {
1028        self.content.attributes_raw()
1029    }
1030
1031    /// A view of the processing instructions' content as a list of key-value pairs.
1032    ///
1033    /// Key-value pairs are used in some processing instructions, for example in
1034    /// `<?xml-stylesheet?>`.
1035    ///
1036    /// Returned iterator does not validate attribute values as may required by
1037    /// target's rules. For example, it doesn't check that substring `?>` is not
1038    /// present in the attribute value. That shouldn't be the problem when event
1039    /// is produced by the reader, because reader detects end of processing instruction
1040    /// by the first `?>` sequence, as required by the specification, and therefore
1041    /// this sequence cannot appear inside it.
1042    ///
1043    /// # Example
1044    ///
1045    /// ```
1046    /// # use pretty_assertions::assert_eq;
1047    /// use std::borrow::Cow;
1048    /// use quick_xml::events::attributes::Attribute;
1049    /// use quick_xml::events::BytesPI;
1050    /// use quick_xml::name::QName;
1051    ///
1052    /// let instruction = BytesPI::new(r#"xml-stylesheet href="style.css""#);
1053    /// for attr in instruction.attributes() {
1054    ///     assert_eq!(attr, Ok(Attribute {
1055    ///         key: QName(b"href"),
1056    ///         value: Cow::Borrowed(b"style.css"),
1057    ///     }));
1058    /// }
1059    /// ```
1060    #[inline]
1061    pub fn attributes(&self) -> Attributes {
1062        self.content.attributes()
1063    }
1064}
1065
1066impl<'a> Debug for BytesPI<'a> {
1067    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
1068        write!(f, "BytesPI {{ content: ")?;
1069        write_cow_string(f, &self.content.buf)?;
1070        write!(f, " }}")
1071    }
1072}
1073
1074impl<'a> Deref for BytesPI<'a> {
1075    type Target = [u8];
1076
1077    fn deref(&self) -> &[u8] {
1078        &self.content
1079    }
1080}
1081
1082#[cfg(feature = "arbitrary")]
1083impl<'a> arbitrary::Arbitrary<'a> for BytesPI<'a> {
1084    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
1085        Ok(Self::new(<&str>::arbitrary(u)?))
1086    }
1087    fn size_hint(depth: usize) -> (usize, Option<usize>) {
1088        return <&str as arbitrary::Arbitrary>::size_hint(depth);
1089    }
1090}
1091
1092////////////////////////////////////////////////////////////////////////////////////////////////////
1093
1094/// An XML declaration (`Event::Decl`).
1095///
1096/// [W3C XML 1.1 Prolog and Document Type Declaration](http://w3.org/TR/xml11/#sec-prolog-dtd)
1097///
1098/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
1099/// returns the content of this event between `<?` and `?>`.
1100///
1101/// Note, that inner text will not contain `?>` sequence inside:
1102///
1103/// ```
1104/// # use quick_xml::events::{BytesDecl, BytesStart, Event};
1105/// # use quick_xml::reader::Reader;
1106/// # use pretty_assertions::assert_eq;
1107/// let mut reader = Reader::from_str("<?xml version = '1.0' ?>");
1108/// let content = "xml version = '1.0' ";
1109/// let event = BytesDecl::from_start(BytesStart::from_content(content, 3));
1110///
1111/// assert_eq!(reader.read_event().unwrap(), Event::Decl(event.borrow()));
1112/// // deref coercion of &BytesDecl to &[u8]
1113/// assert_eq!(&event as &[u8], content.as_bytes());
1114/// // AsRef<[u8]> for &T + deref coercion
1115/// assert_eq!(event.as_ref(), content.as_bytes());
1116/// ```
1117#[derive(Clone, Debug, Eq, PartialEq)]
1118pub struct BytesDecl<'a> {
1119    content: BytesStart<'a>,
1120}
1121
1122impl<'a> BytesDecl<'a> {
1123    /// Constructs a new `XmlDecl` from the (mandatory) _version_ (should be `1.0` or `1.1`),
1124    /// the optional _encoding_ (e.g., `UTF-8`) and the optional _standalone_ (`yes` or `no`)
1125    /// attribute.
1126    ///
1127    /// Does not escape any of its inputs. Always uses double quotes to wrap the attribute values.
1128    /// The caller is responsible for escaping attribute values. Shouldn't usually be relevant since
1129    /// the double quote character is not allowed in any of the attribute values.
1130    pub fn new(
1131        version: &str,
1132        encoding: Option<&str>,
1133        standalone: Option<&str>,
1134    ) -> BytesDecl<'static> {
1135        // Compute length of the buffer based on supplied attributes
1136        // ' encoding=""'   => 12
1137        let encoding_attr_len = if let Some(xs) = encoding {
1138            12 + xs.len()
1139        } else {
1140            0
1141        };
1142        // ' standalone=""' => 14
1143        let standalone_attr_len = if let Some(xs) = standalone {
1144            14 + xs.len()
1145        } else {
1146            0
1147        };
1148        // 'xml version=""' => 14
1149        let mut buf = String::with_capacity(14 + encoding_attr_len + standalone_attr_len);
1150
1151        buf.push_str("xml version=\"");
1152        buf.push_str(version);
1153
1154        if let Some(encoding_val) = encoding {
1155            buf.push_str("\" encoding=\"");
1156            buf.push_str(encoding_val);
1157        }
1158
1159        if let Some(standalone_val) = standalone {
1160            buf.push_str("\" standalone=\"");
1161            buf.push_str(standalone_val);
1162        }
1163        buf.push('"');
1164
1165        BytesDecl {
1166            content: BytesStart::from_content(buf, 3),
1167        }
1168    }
1169
1170    /// Creates a `BytesDecl` from a `BytesStart`
1171    pub const fn from_start(start: BytesStart<'a>) -> Self {
1172        Self { content: start }
1173    }
1174
1175    /// Gets xml version, excluding quotes (`'` or `"`).
1176    ///
1177    /// According to the [grammar], the version *must* be the first thing in the declaration.
1178    /// This method tries to extract the first thing in the declaration and return it.
1179    /// In case of multiple attributes value of the first one is returned.
1180    ///
1181    /// If version is missed in the declaration, or the first thing is not a version,
1182    /// [`IllFormedError::MissingDeclVersion`] will be returned.
1183    ///
1184    /// # Examples
1185    ///
1186    /// ```
1187    /// use quick_xml::errors::{Error, IllFormedError};
1188    /// use quick_xml::events::{BytesDecl, BytesStart};
1189    ///
1190    /// // <?xml version='1.1'?>
1191    /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.1'", 0));
1192    /// assert_eq!(decl.version().unwrap(), b"1.1".as_ref());
1193    ///
1194    /// // <?xml version='1.0' version='1.1'?>
1195    /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.0' version='1.1'", 0));
1196    /// assert_eq!(decl.version().unwrap(), b"1.0".as_ref());
1197    ///
1198    /// // <?xml encoding='utf-8'?>
1199    /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8'", 0));
1200    /// match decl.version() {
1201    ///     Err(Error::IllFormed(IllFormedError::MissingDeclVersion(Some(key)))) => assert_eq!(key, "encoding"),
1202    ///     _ => assert!(false),
1203    /// }
1204    ///
1205    /// // <?xml encoding='utf-8' version='1.1'?>
1206    /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8' version='1.1'", 0));
1207    /// match decl.version() {
1208    ///     Err(Error::IllFormed(IllFormedError::MissingDeclVersion(Some(key)))) => assert_eq!(key, "encoding"),
1209    ///     _ => assert!(false),
1210    /// }
1211    ///
1212    /// // <?xml?>
1213    /// let decl = BytesDecl::from_start(BytesStart::from_content("", 0));
1214    /// match decl.version() {
1215    ///     Err(Error::IllFormed(IllFormedError::MissingDeclVersion(None))) => {},
1216    ///     _ => assert!(false),
1217    /// }
1218    /// ```
1219    ///
1220    /// [grammar]: https://www.w3.org/TR/xml11/#NT-XMLDecl
1221    pub fn version(&self) -> Result<Cow<[u8]>, Error> {
1222        // The version *must* be the first thing in the declaration.
1223        match self.content.attributes().with_checks(false).next() {
1224            Some(Ok(a)) if a.key.as_ref() == b"version" => Ok(a.value),
1225            // first attribute was not "version"
1226            Some(Ok(a)) => {
1227                let found = from_utf8(a.key.as_ref())
1228                    .map_err(|_| IllFormedError::MissingDeclVersion(None))?
1229                    .to_string();
1230                Err(Error::IllFormed(IllFormedError::MissingDeclVersion(Some(
1231                    found,
1232                ))))
1233            }
1234            // error parsing attributes
1235            Some(Err(e)) => Err(e.into()),
1236            // no attributes
1237            None => Err(Error::IllFormed(IllFormedError::MissingDeclVersion(None))),
1238        }
1239    }
1240
1241    /// Gets xml encoding, excluding quotes (`'` or `"`).
1242    ///
1243    /// Although according to the [grammar] encoding must appear before `"standalone"`
1244    /// and after `"version"`, this method does not check that. The first occurrence
1245    /// of the attribute will be returned even if there are several. Also, method does
1246    /// not restrict symbols that can forming the encoding, so the returned encoding
1247    /// name may not correspond to the grammar.
1248    ///
1249    /// # Examples
1250    ///
1251    /// ```
1252    /// use std::borrow::Cow;
1253    /// use quick_xml::Error;
1254    /// use quick_xml::events::{BytesDecl, BytesStart};
1255    ///
1256    /// // <?xml version='1.1'?>
1257    /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.1'", 0));
1258    /// assert!(decl.encoding().is_none());
1259    ///
1260    /// // <?xml encoding='utf-8'?>
1261    /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8'", 0));
1262    /// match decl.encoding() {
1263    ///     Some(Ok(Cow::Borrowed(encoding))) => assert_eq!(encoding, b"utf-8"),
1264    ///     _ => assert!(false),
1265    /// }
1266    ///
1267    /// // <?xml encoding='something_WRONG' encoding='utf-8'?>
1268    /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='something_WRONG' encoding='utf-8'", 0));
1269    /// match decl.encoding() {
1270    ///     Some(Ok(Cow::Borrowed(encoding))) => assert_eq!(encoding, b"something_WRONG"),
1271    ///     _ => assert!(false),
1272    /// }
1273    /// ```
1274    ///
1275    /// [grammar]: https://www.w3.org/TR/xml11/#NT-XMLDecl
1276    pub fn encoding(&self) -> Option<Result<Cow<[u8]>, AttrError>> {
1277        self.content
1278            .try_get_attribute("encoding")
1279            .map(|a| a.map(|a| a.value))
1280            .transpose()
1281    }
1282
1283    /// Gets xml standalone, excluding quotes (`'` or `"`).
1284    ///
1285    /// Although according to the [grammar] standalone flag must appear after `"version"`
1286    /// and `"encoding"`, this method does not check that. The first occurrence of the
1287    /// attribute will be returned even if there are several. Also, method does not
1288    /// restrict symbols that can forming the value, so the returned flag name may not
1289    /// correspond to the grammar.
1290    ///
1291    /// # Examples
1292    ///
1293    /// ```
1294    /// use std::borrow::Cow;
1295    /// use quick_xml::Error;
1296    /// use quick_xml::events::{BytesDecl, BytesStart};
1297    ///
1298    /// // <?xml version='1.1'?>
1299    /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.1'", 0));
1300    /// assert!(decl.standalone().is_none());
1301    ///
1302    /// // <?xml standalone='yes'?>
1303    /// let decl = BytesDecl::from_start(BytesStart::from_content(" standalone='yes'", 0));
1304    /// match decl.standalone() {
1305    ///     Some(Ok(Cow::Borrowed(encoding))) => assert_eq!(encoding, b"yes"),
1306    ///     _ => assert!(false),
1307    /// }
1308    ///
1309    /// // <?xml standalone='something_WRONG' encoding='utf-8'?>
1310    /// let decl = BytesDecl::from_start(BytesStart::from_content(" standalone='something_WRONG' encoding='utf-8'", 0));
1311    /// match decl.standalone() {
1312    ///     Some(Ok(Cow::Borrowed(flag))) => assert_eq!(flag, b"something_WRONG"),
1313    ///     _ => assert!(false),
1314    /// }
1315    /// ```
1316    ///
1317    /// [grammar]: https://www.w3.org/TR/xml11/#NT-XMLDecl
1318    pub fn standalone(&self) -> Option<Result<Cow<[u8]>, AttrError>> {
1319        self.content
1320            .try_get_attribute("standalone")
1321            .map(|a| a.map(|a| a.value))
1322            .transpose()
1323    }
1324
1325    /// Gets the actual encoding using [_get an encoding_](https://encoding.spec.whatwg.org/#concept-encoding-get)
1326    /// algorithm.
1327    ///
1328    /// If encoding in not known, or `encoding` key was not found, returns `None`.
1329    /// In case of duplicated `encoding` key, encoding, corresponding to the first
1330    /// one, is returned.
1331    #[cfg(feature = "encoding")]
1332    pub fn encoder(&self) -> Option<&'static Encoding> {
1333        self.encoding()
1334            .and_then(|e| e.ok())
1335            .and_then(|e| Encoding::for_label(&e))
1336    }
1337
1338    /// Converts the event into an owned event.
1339    pub fn into_owned(self) -> BytesDecl<'static> {
1340        BytesDecl {
1341            content: self.content.into_owned(),
1342        }
1343    }
1344
1345    /// Converts the event into a borrowed event.
1346    #[inline]
1347    pub fn borrow(&self) -> BytesDecl {
1348        BytesDecl {
1349            content: self.content.borrow(),
1350        }
1351    }
1352}
1353
1354impl<'a> Deref for BytesDecl<'a> {
1355    type Target = [u8];
1356
1357    fn deref(&self) -> &[u8] {
1358        &self.content
1359    }
1360}
1361
1362#[cfg(feature = "arbitrary")]
1363impl<'a> arbitrary::Arbitrary<'a> for BytesDecl<'a> {
1364    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
1365        Ok(Self::new(
1366            <&str>::arbitrary(u)?,
1367            Option::<&str>::arbitrary(u)?,
1368            Option::<&str>::arbitrary(u)?,
1369        ))
1370    }
1371
1372    fn size_hint(depth: usize) -> (usize, Option<usize>) {
1373        return <&str as arbitrary::Arbitrary>::size_hint(depth);
1374    }
1375}
1376
1377////////////////////////////////////////////////////////////////////////////////////////////////////
1378
1379/// Event emitted by [`Reader::read_event_into`].
1380///
1381/// [`Reader::read_event_into`]: crate::reader::Reader::read_event_into
1382#[derive(Clone, Debug, Eq, PartialEq)]
1383#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
1384pub enum Event<'a> {
1385    /// Start tag (with attributes) `<tag attr="value">`.
1386    Start(BytesStart<'a>),
1387    /// End tag `</tag>`.
1388    End(BytesEnd<'a>),
1389    /// Empty element tag (with attributes) `<tag attr="value" />`.
1390    Empty(BytesStart<'a>),
1391    /// Escaped character data between tags.
1392    Text(BytesText<'a>),
1393    /// Unescaped character data stored in `<![CDATA[...]]>`.
1394    CData(BytesCData<'a>),
1395    /// Comment `<!-- ... -->`.
1396    Comment(BytesText<'a>),
1397    /// XML declaration `<?xml ...?>`.
1398    Decl(BytesDecl<'a>),
1399    /// Processing instruction `<?...?>`.
1400    PI(BytesPI<'a>),
1401    /// Document type definition data (DTD) stored in `<!DOCTYPE ...>`.
1402    DocType(BytesText<'a>),
1403    /// End of XML document.
1404    Eof,
1405}
1406
1407impl<'a> Event<'a> {
1408    /// Converts the event to an owned version, untied to the lifetime of
1409    /// buffer used when reading but incurring a new, separate allocation.
1410    pub fn into_owned(self) -> Event<'static> {
1411        match self {
1412            Event::Start(e) => Event::Start(e.into_owned()),
1413            Event::End(e) => Event::End(e.into_owned()),
1414            Event::Empty(e) => Event::Empty(e.into_owned()),
1415            Event::Text(e) => Event::Text(e.into_owned()),
1416            Event::Comment(e) => Event::Comment(e.into_owned()),
1417            Event::CData(e) => Event::CData(e.into_owned()),
1418            Event::Decl(e) => Event::Decl(e.into_owned()),
1419            Event::PI(e) => Event::PI(e.into_owned()),
1420            Event::DocType(e) => Event::DocType(e.into_owned()),
1421            Event::Eof => Event::Eof,
1422        }
1423    }
1424
1425    /// Converts the event into a borrowed event.
1426    #[inline]
1427    pub fn borrow(&self) -> Event {
1428        match self {
1429            Event::Start(e) => Event::Start(e.borrow()),
1430            Event::End(e) => Event::End(e.borrow()),
1431            Event::Empty(e) => Event::Empty(e.borrow()),
1432            Event::Text(e) => Event::Text(e.borrow()),
1433            Event::Comment(e) => Event::Comment(e.borrow()),
1434            Event::CData(e) => Event::CData(e.borrow()),
1435            Event::Decl(e) => Event::Decl(e.borrow()),
1436            Event::PI(e) => Event::PI(e.borrow()),
1437            Event::DocType(e) => Event::DocType(e.borrow()),
1438            Event::Eof => Event::Eof,
1439        }
1440    }
1441}
1442
1443impl<'a> Deref for Event<'a> {
1444    type Target = [u8];
1445
1446    fn deref(&self) -> &[u8] {
1447        match *self {
1448            Event::Start(ref e) | Event::Empty(ref e) => e,
1449            Event::End(ref e) => e,
1450            Event::Text(ref e) => e,
1451            Event::Decl(ref e) => e,
1452            Event::PI(ref e) => e,
1453            Event::CData(ref e) => e,
1454            Event::Comment(ref e) => e,
1455            Event::DocType(ref e) => e,
1456            Event::Eof => &[],
1457        }
1458    }
1459}
1460
1461impl<'a> AsRef<Event<'a>> for Event<'a> {
1462    fn as_ref(&self) -> &Event<'a> {
1463        self
1464    }
1465}
1466
1467////////////////////////////////////////////////////////////////////////////////////////////////////
1468
1469#[inline]
1470fn str_cow_to_bytes<'a, C: Into<Cow<'a, str>>>(content: C) -> Cow<'a, [u8]> {
1471    match content.into() {
1472        Cow::Borrowed(s) => Cow::Borrowed(s.as_bytes()),
1473        Cow::Owned(s) => Cow::Owned(s.into_bytes()),
1474    }
1475}
1476
1477fn trim_cow<'a, F>(value: Cow<'a, [u8]>, trim: F) -> Cow<'a, [u8]>
1478where
1479    F: FnOnce(&[u8]) -> &[u8],
1480{
1481    match value {
1482        Cow::Borrowed(bytes) => Cow::Borrowed(trim(bytes)),
1483        Cow::Owned(mut bytes) => {
1484            let trimmed = trim(&bytes);
1485            if trimmed.len() != bytes.len() {
1486                bytes = trimmed.to_vec();
1487            }
1488            Cow::Owned(bytes)
1489        }
1490    }
1491}
1492
1493#[cfg(test)]
1494mod test {
1495    use super::*;
1496    use pretty_assertions::assert_eq;
1497
1498    #[test]
1499    fn bytestart_create() {
1500        let b = BytesStart::new("test");
1501        assert_eq!(b.len(), 4);
1502        assert_eq!(b.name(), QName(b"test"));
1503    }
1504
1505    #[test]
1506    fn bytestart_set_name() {
1507        let mut b = BytesStart::new("test");
1508        assert_eq!(b.len(), 4);
1509        assert_eq!(b.name(), QName(b"test"));
1510        assert_eq!(b.attributes_raw(), b"");
1511        b.push_attribute(("x", "a"));
1512        assert_eq!(b.len(), 10);
1513        assert_eq!(b.attributes_raw(), b" x=\"a\"");
1514        b.set_name(b"g");
1515        assert_eq!(b.len(), 7);
1516        assert_eq!(b.name(), QName(b"g"));
1517    }
1518
1519    #[test]
1520    fn bytestart_clear_attributes() {
1521        let mut b = BytesStart::new("test");
1522        b.push_attribute(("x", "y\"z"));
1523        b.push_attribute(("x", "y\"z"));
1524        b.clear_attributes();
1525        assert!(b.attributes().next().is_none());
1526        assert_eq!(b.len(), 4);
1527        assert_eq!(b.name(), QName(b"test"));
1528    }
1529}