quick_xml/events/mod.rs
1//! Defines zero-copy XML events used throughout this library.
2//!
3//! A XML event often represents part of a XML element.
4//! They occur both during reading and writing and are
5//! usually used with the stream-oriented API.
6//!
7//! For example, the XML element
8//! ```xml
9//! <name attr="value">Inner text</name>
10//! ```
11//! consists of the three events `Start`, `Text` and `End`.
12//! They can also represent other parts in an XML document like the
13//! XML declaration. Each Event usually contains further information,
14//! like the tag name, the attribute or the inner text.
15//!
16//! See [`Event`] for a list of all possible events.
17//!
18//! # Reading
19//! When reading a XML stream, the events are emitted by [`Reader::read_event`]
20//! and [`Reader::read_event_into`]. You must listen
21//! for the different types of events you are interested in.
22//!
23//! See [`Reader`] for further information.
24//!
25//! # Writing
26//! When writing the XML document, you must create the XML element
27//! by constructing the events it consists of and pass them to the writer
28//! sequentially.
29//!
30//! See [`Writer`] for further information.
31//!
32//! [`Reader::read_event`]: crate::reader::Reader::read_event
33//! [`Reader::read_event_into`]: crate::reader::Reader::read_event_into
34//! [`Reader`]: crate::reader::Reader
35//! [`Writer`]: crate::writer::Writer
36//! [`Event`]: crate::events::Event
37
38pub mod attributes;
39
40#[cfg(feature = "encoding")]
41use encoding_rs::Encoding;
42use std::borrow::Cow;
43use std::fmt::{self, Debug, Formatter};
44use std::iter::FusedIterator;
45use std::mem::replace;
46use std::ops::Deref;
47use std::str::from_utf8;
48
49use crate::encoding::{Decoder, EncodingError};
50use crate::errors::{Error, IllFormedError};
51use crate::escape::{
52 escape, minimal_escape, partial_escape, resolve_predefined_entity, unescape_with,
53};
54use crate::name::{LocalName, QName};
55#[cfg(feature = "serialize")]
56use crate::utils::CowRef;
57use crate::utils::{name_len, trim_xml_end, trim_xml_start, write_cow_string, Bytes};
58use attributes::{AttrError, Attribute, Attributes};
59
60/// Opening tag data (`Event::Start`), with optional attributes: `<name attr="value">`.
61///
62/// The name can be accessed using the [`name`] or [`local_name`] methods.
63/// An iterator over the attributes is returned by the [`attributes`] method.
64///
65/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
66/// returns the content of this event between `<` and `>` or `/>`:
67///
68/// ```
69/// # use quick_xml::events::{BytesStart, Event};
70/// # use quick_xml::reader::Reader;
71/// # use pretty_assertions::assert_eq;
72/// // Remember, that \ at the end of string literal strips
73/// // all space characters to the first non-space character
74/// let mut reader = Reader::from_str("\
75/// <element a1 = 'val1' a2=\"val2\" />\
76/// <element a1 = 'val1' a2=\"val2\" >"
77/// );
78/// let content = "element a1 = 'val1' a2=\"val2\" ";
79/// let event = BytesStart::from_content(content, 7);
80///
81/// assert_eq!(reader.read_event().unwrap(), Event::Empty(event.borrow()));
82/// assert_eq!(reader.read_event().unwrap(), Event::Start(event.borrow()));
83/// // deref coercion of &BytesStart to &[u8]
84/// assert_eq!(&event as &[u8], content.as_bytes());
85/// // AsRef<[u8]> for &T + deref coercion
86/// assert_eq!(event.as_ref(), content.as_bytes());
87/// ```
88///
89/// [`name`]: Self::name
90/// [`local_name`]: Self::local_name
91/// [`attributes`]: Self::attributes
92#[derive(Clone, Eq, PartialEq)]
93pub struct BytesStart<'a> {
94 /// content of the element, before any utf8 conversion
95 pub(crate) buf: Cow<'a, [u8]>,
96 /// end of the element name, the name starts at that the start of `buf`
97 pub(crate) name_len: usize,
98}
99
100impl<'a> BytesStart<'a> {
101 /// Internal constructor, used by `Reader`. Supplies data in reader's encoding
102 #[inline]
103 pub(crate) const fn wrap(content: &'a [u8], name_len: usize) -> Self {
104 BytesStart {
105 buf: Cow::Borrowed(content),
106 name_len,
107 }
108 }
109
110 /// Creates a new `BytesStart` from the given name.
111 ///
112 /// # Warning
113 ///
114 /// `name` must be a valid name.
115 #[inline]
116 pub fn new<C: Into<Cow<'a, str>>>(name: C) -> Self {
117 let buf = str_cow_to_bytes(name);
118 BytesStart {
119 name_len: buf.len(),
120 buf,
121 }
122 }
123
124 /// Creates a new `BytesStart` from the given content (name + attributes).
125 ///
126 /// # Warning
127 ///
128 /// `&content[..name_len]` must be a valid name, and the remainder of `content`
129 /// must be correctly-formed attributes. Neither are checked, it is possible
130 /// to generate invalid XML if `content` or `name_len` are incorrect.
131 #[inline]
132 pub fn from_content<C: Into<Cow<'a, str>>>(content: C, name_len: usize) -> Self {
133 BytesStart {
134 buf: str_cow_to_bytes(content),
135 name_len,
136 }
137 }
138
139 /// Converts the event into an owned event.
140 pub fn into_owned(self) -> BytesStart<'static> {
141 BytesStart {
142 buf: Cow::Owned(self.buf.into_owned()),
143 name_len: self.name_len,
144 }
145 }
146
147 /// Converts the event into an owned event without taking ownership of Event
148 pub fn to_owned(&self) -> BytesStart<'static> {
149 BytesStart {
150 buf: Cow::Owned(self.buf.clone().into_owned()),
151 name_len: self.name_len,
152 }
153 }
154
155 /// Converts the event into a borrowed event. Most useful when paired with [`to_end`].
156 ///
157 /// # Example
158 ///
159 /// ```
160 /// use quick_xml::events::{BytesStart, Event};
161 /// # use quick_xml::writer::Writer;
162 /// # use quick_xml::Error;
163 ///
164 /// struct SomeStruct<'a> {
165 /// attrs: BytesStart<'a>,
166 /// // ...
167 /// }
168 /// # impl<'a> SomeStruct<'a> {
169 /// # fn example(&self) -> Result<(), Error> {
170 /// # let mut writer = Writer::new(Vec::new());
171 ///
172 /// writer.write_event(Event::Start(self.attrs.borrow()))?;
173 /// // ...
174 /// writer.write_event(Event::End(self.attrs.to_end()))?;
175 /// # Ok(())
176 /// # }}
177 /// ```
178 ///
179 /// [`to_end`]: Self::to_end
180 pub fn borrow(&self) -> BytesStart {
181 BytesStart {
182 buf: Cow::Borrowed(&self.buf),
183 name_len: self.name_len,
184 }
185 }
186
187 /// Creates new paired close tag
188 #[inline]
189 pub fn to_end(&self) -> BytesEnd {
190 BytesEnd::from(self.name())
191 }
192
193 /// Gets the undecoded raw tag name, as present in the input stream.
194 #[inline]
195 pub fn name(&self) -> QName {
196 QName(&self.buf[..self.name_len])
197 }
198
199 /// Gets the undecoded raw local tag name (excluding namespace) as present
200 /// in the input stream.
201 ///
202 /// All content up to and including the first `:` character is removed from the tag name.
203 #[inline]
204 pub fn local_name(&self) -> LocalName {
205 self.name().into()
206 }
207
208 /// Edit the name of the BytesStart in-place
209 ///
210 /// # Warning
211 ///
212 /// `name` must be a valid name.
213 pub fn set_name(&mut self, name: &[u8]) -> &mut BytesStart<'a> {
214 let bytes = self.buf.to_mut();
215 bytes.splice(..self.name_len, name.iter().cloned());
216 self.name_len = name.len();
217 self
218 }
219
220 /// Gets the undecoded raw tag name, as present in the input stream, which
221 /// is borrowed either to the input, or to the event.
222 ///
223 /// # Lifetimes
224 ///
225 /// - `'a`: Lifetime of the input data from which this event is borrow
226 /// - `'e`: Lifetime of the concrete event instance
227 // TODO: We should made this is a part of public API, but with safe wrapped for a name
228 #[cfg(feature = "serialize")]
229 pub(crate) fn raw_name<'e>(&'e self) -> CowRef<'a, 'e, [u8]> {
230 match self.buf {
231 Cow::Borrowed(b) => CowRef::Input(&b[..self.name_len]),
232 Cow::Owned(ref o) => CowRef::Slice(&o[..self.name_len]),
233 }
234 }
235}
236
237/// Attribute-related methods
238impl<'a> BytesStart<'a> {
239 /// Consumes `self` and yield a new `BytesStart` with additional attributes from an iterator.
240 ///
241 /// The yielded items must be convertible to [`Attribute`] using `Into`.
242 pub fn with_attributes<'b, I>(mut self, attributes: I) -> Self
243 where
244 I: IntoIterator,
245 I::Item: Into<Attribute<'b>>,
246 {
247 self.extend_attributes(attributes);
248 self
249 }
250
251 /// Add additional attributes to this tag using an iterator.
252 ///
253 /// The yielded items must be convertible to [`Attribute`] using `Into`.
254 pub fn extend_attributes<'b, I>(&mut self, attributes: I) -> &mut BytesStart<'a>
255 where
256 I: IntoIterator,
257 I::Item: Into<Attribute<'b>>,
258 {
259 for attr in attributes {
260 self.push_attribute(attr);
261 }
262 self
263 }
264
265 /// Adds an attribute to this element.
266 pub fn push_attribute<'b, A>(&mut self, attr: A)
267 where
268 A: Into<Attribute<'b>>,
269 {
270 self.buf.to_mut().push(b' ');
271 self.push_attr(attr.into());
272 }
273
274 /// Remove all attributes from the ByteStart
275 pub fn clear_attributes(&mut self) -> &mut BytesStart<'a> {
276 self.buf.to_mut().truncate(self.name_len);
277 self
278 }
279
280 /// Returns an iterator over the attributes of this tag.
281 pub fn attributes(&self) -> Attributes {
282 Attributes::wrap(&self.buf, self.name_len, false)
283 }
284
285 /// Returns an iterator over the HTML-like attributes of this tag (no mandatory quotes or `=`).
286 pub fn html_attributes(&self) -> Attributes {
287 Attributes::wrap(&self.buf, self.name_len, true)
288 }
289
290 /// Gets the undecoded raw string with the attributes of this tag as a `&[u8]`,
291 /// including the whitespace after the tag name if there is any.
292 #[inline]
293 pub fn attributes_raw(&self) -> &[u8] {
294 &self.buf[self.name_len..]
295 }
296
297 /// Try to get an attribute
298 pub fn try_get_attribute<N: AsRef<[u8]> + Sized>(
299 &'a self,
300 attr_name: N,
301 ) -> Result<Option<Attribute<'a>>, AttrError> {
302 for a in self.attributes().with_checks(false) {
303 let a = a?;
304 if a.key.as_ref() == attr_name.as_ref() {
305 return Ok(Some(a));
306 }
307 }
308 Ok(None)
309 }
310
311 /// Adds an attribute to this element.
312 pub(crate) fn push_attr<'b>(&mut self, attr: Attribute<'b>) {
313 let bytes = self.buf.to_mut();
314 bytes.extend_from_slice(attr.key.as_ref());
315 bytes.extend_from_slice(b"=\"");
316 // FIXME: need to escape attribute content
317 bytes.extend_from_slice(attr.value.as_ref());
318 bytes.push(b'"');
319 }
320
321 /// Adds new line in existing element
322 pub(crate) fn push_newline(&mut self) {
323 self.buf.to_mut().push(b'\n');
324 }
325
326 /// Adds indentation bytes in existing element
327 pub(crate) fn push_indent(&mut self, indent: &[u8]) {
328 self.buf.to_mut().extend_from_slice(indent);
329 }
330}
331
332impl<'a> Debug for BytesStart<'a> {
333 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
334 write!(f, "BytesStart {{ buf: ")?;
335 write_cow_string(f, &self.buf)?;
336 write!(f, ", name_len: {} }}", self.name_len)
337 }
338}
339
340impl<'a> Deref for BytesStart<'a> {
341 type Target = [u8];
342
343 fn deref(&self) -> &[u8] {
344 &self.buf
345 }
346}
347
348impl<'a> From<QName<'a>> for BytesStart<'a> {
349 #[inline]
350 fn from(name: QName<'a>) -> Self {
351 let name = name.into_inner();
352 Self::wrap(name, name.len())
353 }
354}
355
356#[cfg(feature = "arbitrary")]
357impl<'a> arbitrary::Arbitrary<'a> for BytesStart<'a> {
358 fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
359 let s = <&str>::arbitrary(u)?;
360 if s.is_empty() || !s.chars().all(char::is_alphanumeric) {
361 return Err(arbitrary::Error::IncorrectFormat);
362 }
363 let mut result = Self::new(s);
364 result.extend_attributes(Vec::<(&str, &str)>::arbitrary(u)?.into_iter());
365 Ok(result)
366 }
367
368 fn size_hint(depth: usize) -> (usize, Option<usize>) {
369 return <&str as arbitrary::Arbitrary>::size_hint(depth);
370 }
371}
372////////////////////////////////////////////////////////////////////////////////////////////////////
373
374/// Closing tag data (`Event::End`): `</name>`.
375///
376/// The name can be accessed using the [`name`] or [`local_name`] methods.
377///
378/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
379/// returns the content of this event between `</` and `>`.
380///
381/// Note, that inner text will not contain `>` character inside:
382///
383/// ```
384/// # use quick_xml::events::{BytesEnd, Event};
385/// # use quick_xml::reader::Reader;
386/// # use pretty_assertions::assert_eq;
387/// let mut reader = Reader::from_str(r#"<element></element a1 = 'val1' a2="val2" >"#);
388/// // Note, that this entire string considered as a .name()
389/// let content = "element a1 = 'val1' a2=\"val2\" ";
390/// let event = BytesEnd::new(content);
391///
392/// reader.config_mut().trim_markup_names_in_closing_tags = false;
393/// reader.config_mut().check_end_names = false;
394/// reader.read_event().unwrap(); // Skip `<element>`
395///
396/// assert_eq!(reader.read_event().unwrap(), Event::End(event.borrow()));
397/// assert_eq!(event.name().as_ref(), content.as_bytes());
398/// // deref coercion of &BytesEnd to &[u8]
399/// assert_eq!(&event as &[u8], content.as_bytes());
400/// // AsRef<[u8]> for &T + deref coercion
401/// assert_eq!(event.as_ref(), content.as_bytes());
402/// ```
403///
404/// [`name`]: Self::name
405/// [`local_name`]: Self::local_name
406#[derive(Clone, Eq, PartialEq)]
407pub struct BytesEnd<'a> {
408 name: Cow<'a, [u8]>,
409}
410
411impl<'a> BytesEnd<'a> {
412 /// Internal constructor, used by `Reader`. Supplies data in reader's encoding
413 #[inline]
414 pub(crate) const fn wrap(name: Cow<'a, [u8]>) -> Self {
415 BytesEnd { name }
416 }
417
418 /// Creates a new `BytesEnd` borrowing a slice.
419 ///
420 /// # Warning
421 ///
422 /// `name` must be a valid name.
423 #[inline]
424 pub fn new<C: Into<Cow<'a, str>>>(name: C) -> Self {
425 Self::wrap(str_cow_to_bytes(name))
426 }
427
428 /// Converts the event into an owned event.
429 pub fn into_owned(self) -> BytesEnd<'static> {
430 BytesEnd {
431 name: Cow::Owned(self.name.into_owned()),
432 }
433 }
434
435 /// Converts the event into a borrowed event.
436 #[inline]
437 pub fn borrow(&self) -> BytesEnd {
438 BytesEnd {
439 name: Cow::Borrowed(&self.name),
440 }
441 }
442
443 /// Gets the undecoded raw tag name, as present in the input stream.
444 #[inline]
445 pub fn name(&self) -> QName {
446 QName(&self.name)
447 }
448
449 /// Gets the undecoded raw local tag name (excluding namespace) as present
450 /// in the input stream.
451 ///
452 /// All content up to and including the first `:` character is removed from the tag name.
453 #[inline]
454 pub fn local_name(&self) -> LocalName {
455 self.name().into()
456 }
457}
458
459impl<'a> Debug for BytesEnd<'a> {
460 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
461 write!(f, "BytesEnd {{ name: ")?;
462 write_cow_string(f, &self.name)?;
463 write!(f, " }}")
464 }
465}
466
467impl<'a> Deref for BytesEnd<'a> {
468 type Target = [u8];
469
470 fn deref(&self) -> &[u8] {
471 &self.name
472 }
473}
474
475impl<'a> From<QName<'a>> for BytesEnd<'a> {
476 #[inline]
477 fn from(name: QName<'a>) -> Self {
478 Self::wrap(name.into_inner().into())
479 }
480}
481
482#[cfg(feature = "arbitrary")]
483impl<'a> arbitrary::Arbitrary<'a> for BytesEnd<'a> {
484 fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
485 Ok(Self::new(<&str>::arbitrary(u)?))
486 }
487 fn size_hint(depth: usize) -> (usize, Option<usize>) {
488 return <&str as arbitrary::Arbitrary>::size_hint(depth);
489 }
490}
491
492////////////////////////////////////////////////////////////////////////////////////////////////////
493
494/// Data from various events (most notably, `Event::Text`) that stored in XML
495/// in escaped form. Internally data is stored in escaped form.
496///
497/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
498/// returns the content of this event. In case of comment this is everything
499/// between `<!--` and `-->` and the text of comment will not contain `-->` inside.
500/// In case of DTD this is everything between `<!DOCTYPE` + spaces and closing `>`
501/// (i.e. in case of DTD the first character is never space):
502///
503/// ```
504/// # use quick_xml::events::{BytesText, Event};
505/// # use quick_xml::reader::Reader;
506/// # use pretty_assertions::assert_eq;
507/// // Remember, that \ at the end of string literal strips
508/// // all space characters to the first non-space character
509/// let mut reader = Reader::from_str("\
510/// <!DOCTYPE comment or text >\
511/// comment or text \
512/// <!--comment or text -->"
513/// );
514/// let content = "comment or text ";
515/// let event = BytesText::new(content);
516///
517/// assert_eq!(reader.read_event().unwrap(), Event::DocType(event.borrow()));
518/// assert_eq!(reader.read_event().unwrap(), Event::Text(event.borrow()));
519/// assert_eq!(reader.read_event().unwrap(), Event::Comment(event.borrow()));
520/// // deref coercion of &BytesText to &[u8]
521/// assert_eq!(&event as &[u8], content.as_bytes());
522/// // AsRef<[u8]> for &T + deref coercion
523/// assert_eq!(event.as_ref(), content.as_bytes());
524/// ```
525#[derive(Clone, Eq, PartialEq)]
526pub struct BytesText<'a> {
527 /// Escaped then encoded content of the event. Content is encoded in the XML
528 /// document encoding when event comes from the reader and should be in the
529 /// document encoding when event passed to the writer
530 content: Cow<'a, [u8]>,
531 /// Encoding in which the `content` is stored inside the event
532 decoder: Decoder,
533}
534
535impl<'a> BytesText<'a> {
536 /// Creates a new `BytesText` from an escaped byte sequence in the specified encoding.
537 #[inline]
538 pub(crate) fn wrap<C: Into<Cow<'a, [u8]>>>(content: C, decoder: Decoder) -> Self {
539 Self {
540 content: content.into(),
541 decoder,
542 }
543 }
544
545 /// Creates a new `BytesText` from an escaped string.
546 #[inline]
547 pub fn from_escaped<C: Into<Cow<'a, str>>>(content: C) -> Self {
548 Self::wrap(str_cow_to_bytes(content), Decoder::utf8())
549 }
550
551 /// Creates a new `BytesText` from a string. The string is expected not to
552 /// be escaped.
553 #[inline]
554 pub fn new(content: &'a str) -> Self {
555 Self::from_escaped(escape(content))
556 }
557
558 /// Ensures that all data is owned to extend the object's lifetime if
559 /// necessary.
560 #[inline]
561 pub fn into_owned(self) -> BytesText<'static> {
562 BytesText {
563 content: self.content.into_owned().into(),
564 decoder: self.decoder,
565 }
566 }
567
568 /// Extracts the inner `Cow` from the `BytesText` event container.
569 #[inline]
570 pub fn into_inner(self) -> Cow<'a, [u8]> {
571 self.content
572 }
573
574 /// Converts the event into a borrowed event.
575 #[inline]
576 pub fn borrow(&self) -> BytesText {
577 BytesText {
578 content: Cow::Borrowed(&self.content),
579 decoder: self.decoder,
580 }
581 }
582
583 /// Decodes then unescapes the content of the event.
584 ///
585 /// This will allocate if the value contains any escape sequences or in
586 /// non-UTF-8 encoding.
587 pub fn unescape(&self) -> Result<Cow<'a, str>, Error> {
588 self.unescape_with(resolve_predefined_entity)
589 }
590
591 /// Decodes then unescapes the content of the event with custom entities.
592 ///
593 /// This will allocate if the value contains any escape sequences or in
594 /// non-UTF-8 encoding.
595 pub fn unescape_with<'entity>(
596 &self,
597 resolve_entity: impl FnMut(&str) -> Option<&'entity str>,
598 ) -> Result<Cow<'a, str>, Error> {
599 let decoded = self.decoder.decode_cow(&self.content)?;
600
601 match unescape_with(&decoded, resolve_entity)? {
602 // Because result is borrowed, no replacements was done and we can use original string
603 Cow::Borrowed(_) => Ok(decoded),
604 Cow::Owned(s) => Ok(s.into()),
605 }
606 }
607
608 /// Removes leading XML whitespace bytes from text content.
609 ///
610 /// Returns `true` if content is empty after that
611 pub fn inplace_trim_start(&mut self) -> bool {
612 self.content = trim_cow(
613 replace(&mut self.content, Cow::Borrowed(b"")),
614 trim_xml_start,
615 );
616 self.content.is_empty()
617 }
618
619 /// Removes trailing XML whitespace bytes from text content.
620 ///
621 /// Returns `true` if content is empty after that
622 pub fn inplace_trim_end(&mut self) -> bool {
623 self.content = trim_cow(replace(&mut self.content, Cow::Borrowed(b"")), trim_xml_end);
624 self.content.is_empty()
625 }
626}
627
628impl<'a> Debug for BytesText<'a> {
629 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
630 write!(f, "BytesText {{ content: ")?;
631 write_cow_string(f, &self.content)?;
632 write!(f, " }}")
633 }
634}
635
636impl<'a> Deref for BytesText<'a> {
637 type Target = [u8];
638
639 fn deref(&self) -> &[u8] {
640 &self.content
641 }
642}
643
644#[cfg(feature = "arbitrary")]
645impl<'a> arbitrary::Arbitrary<'a> for BytesText<'a> {
646 fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
647 let s = <&str>::arbitrary(u)?;
648 if !s.chars().all(char::is_alphanumeric) {
649 return Err(arbitrary::Error::IncorrectFormat);
650 }
651 Ok(Self::new(s))
652 }
653
654 fn size_hint(depth: usize) -> (usize, Option<usize>) {
655 return <&str as arbitrary::Arbitrary>::size_hint(depth);
656 }
657}
658
659////////////////////////////////////////////////////////////////////////////////////////////////////
660
661/// CDATA content contains unescaped data from the reader. If you want to write them as a text,
662/// [convert](Self::escape) it to [`BytesText`].
663///
664/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
665/// returns the content of this event between `<![CDATA[` and `]]>`.
666///
667/// Note, that inner text will not contain `]]>` sequence inside:
668///
669/// ```
670/// # use quick_xml::events::{BytesCData, Event};
671/// # use quick_xml::reader::Reader;
672/// # use pretty_assertions::assert_eq;
673/// let mut reader = Reader::from_str("<![CDATA[ CDATA section ]]>");
674/// let content = " CDATA section ";
675/// let event = BytesCData::new(content);
676///
677/// assert_eq!(reader.read_event().unwrap(), Event::CData(event.borrow()));
678/// // deref coercion of &BytesCData to &[u8]
679/// assert_eq!(&event as &[u8], content.as_bytes());
680/// // AsRef<[u8]> for &T + deref coercion
681/// assert_eq!(event.as_ref(), content.as_bytes());
682/// ```
683#[derive(Clone, Eq, PartialEq)]
684pub struct BytesCData<'a> {
685 content: Cow<'a, [u8]>,
686 /// Encoding in which the `content` is stored inside the event
687 decoder: Decoder,
688}
689
690impl<'a> BytesCData<'a> {
691 /// Creates a new `BytesCData` from a byte sequence in the specified encoding.
692 #[inline]
693 pub(crate) fn wrap<C: Into<Cow<'a, [u8]>>>(content: C, decoder: Decoder) -> Self {
694 Self {
695 content: content.into(),
696 decoder,
697 }
698 }
699
700 /// Creates a new `BytesCData` from a string.
701 ///
702 /// # Warning
703 ///
704 /// `content` must not contain the `]]>` sequence. You can use
705 /// [`BytesCData::escaped`] to escape the content instead.
706 #[inline]
707 pub fn new<C: Into<Cow<'a, str>>>(content: C) -> Self {
708 Self::wrap(str_cow_to_bytes(content), Decoder::utf8())
709 }
710
711 /// Creates an iterator of `BytesCData` from a string.
712 ///
713 /// If a string contains `]]>`, it needs to be split into multiple `CDATA`
714 /// sections, splitting the `]]` and `>` characters, because the CDATA closing
715 /// sequence cannot be escaped. This iterator yields a `BytesCData` instance
716 /// for each of those sections.
717 ///
718 /// # Examples
719 ///
720 /// ```
721 /// # use quick_xml::events::BytesCData;
722 /// # use pretty_assertions::assert_eq;
723 /// let content = "";
724 /// let cdata = BytesCData::escaped(content).collect::<Vec<_>>();
725 /// assert_eq!(cdata, &[BytesCData::new("")]);
726 ///
727 /// let content = "Certain tokens like ]]> can be difficult and <invalid>";
728 /// let cdata = BytesCData::escaped(content).collect::<Vec<_>>();
729 /// assert_eq!(cdata, &[
730 /// BytesCData::new("Certain tokens like ]]"),
731 /// BytesCData::new("> can be difficult and <invalid>"),
732 /// ]);
733 ///
734 /// let content = "foo]]>bar]]>baz]]>quux";
735 /// let cdata = BytesCData::escaped(content).collect::<Vec<_>>();
736 /// assert_eq!(cdata, &[
737 /// BytesCData::new("foo]]"),
738 /// BytesCData::new(">bar]]"),
739 /// BytesCData::new(">baz]]"),
740 /// BytesCData::new(">quux"),
741 /// ]);
742 /// ```
743 #[inline]
744 pub fn escaped(content: &'a str) -> CDataIterator<'a> {
745 CDataIterator {
746 unprocessed: content.as_bytes(),
747 finished: false,
748 }
749 }
750
751 /// Ensures that all data is owned to extend the object's lifetime if
752 /// necessary.
753 #[inline]
754 pub fn into_owned(self) -> BytesCData<'static> {
755 BytesCData {
756 content: self.content.into_owned().into(),
757 decoder: self.decoder,
758 }
759 }
760
761 /// Extracts the inner `Cow` from the `BytesCData` event container.
762 #[inline]
763 pub fn into_inner(self) -> Cow<'a, [u8]> {
764 self.content
765 }
766
767 /// Converts the event into a borrowed event.
768 #[inline]
769 pub fn borrow(&self) -> BytesCData {
770 BytesCData {
771 content: Cow::Borrowed(&self.content),
772 decoder: self.decoder,
773 }
774 }
775
776 /// Converts this CDATA content to an escaped version, that can be written
777 /// as an usual text in XML.
778 ///
779 /// This function performs following replacements:
780 ///
781 /// | Character | Replacement
782 /// |-----------|------------
783 /// | `<` | `<`
784 /// | `>` | `>`
785 /// | `&` | `&`
786 /// | `'` | `'`
787 /// | `"` | `"`
788 pub fn escape(self) -> Result<BytesText<'a>, EncodingError> {
789 let decoded = self.decode()?;
790 Ok(BytesText::wrap(
791 match escape(decoded) {
792 Cow::Borrowed(escaped) => Cow::Borrowed(escaped.as_bytes()),
793 Cow::Owned(escaped) => Cow::Owned(escaped.into_bytes()),
794 },
795 Decoder::utf8(),
796 ))
797 }
798
799 /// Converts this CDATA content to an escaped version, that can be written
800 /// as an usual text in XML.
801 ///
802 /// In XML text content, it is allowed (though not recommended) to leave
803 /// the quote special characters `"` and `'` unescaped.
804 ///
805 /// This function performs following replacements:
806 ///
807 /// | Character | Replacement
808 /// |-----------|------------
809 /// | `<` | `<`
810 /// | `>` | `>`
811 /// | `&` | `&`
812 pub fn partial_escape(self) -> Result<BytesText<'a>, EncodingError> {
813 let decoded = self.decode()?;
814 Ok(BytesText::wrap(
815 match partial_escape(decoded) {
816 Cow::Borrowed(escaped) => Cow::Borrowed(escaped.as_bytes()),
817 Cow::Owned(escaped) => Cow::Owned(escaped.into_bytes()),
818 },
819 Decoder::utf8(),
820 ))
821 }
822
823 /// Converts this CDATA content to an escaped version, that can be written
824 /// as an usual text in XML. This method escapes only those characters that
825 /// must be escaped according to the [specification].
826 ///
827 /// This function performs following replacements:
828 ///
829 /// | Character | Replacement
830 /// |-----------|------------
831 /// | `<` | `<`
832 /// | `&` | `&`
833 ///
834 /// [specification]: https://www.w3.org/TR/xml11/#syntax
835 pub fn minimal_escape(self) -> Result<BytesText<'a>, EncodingError> {
836 let decoded = self.decode()?;
837 Ok(BytesText::wrap(
838 match minimal_escape(decoded) {
839 Cow::Borrowed(escaped) => Cow::Borrowed(escaped.as_bytes()),
840 Cow::Owned(escaped) => Cow::Owned(escaped.into_bytes()),
841 },
842 Decoder::utf8(),
843 ))
844 }
845
846 /// Gets content of this text buffer in the specified encoding
847 pub(crate) fn decode(&self) -> Result<Cow<'a, str>, EncodingError> {
848 Ok(self.decoder.decode_cow(&self.content)?)
849 }
850}
851
852impl<'a> Debug for BytesCData<'a> {
853 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
854 write!(f, "BytesCData {{ content: ")?;
855 write_cow_string(f, &self.content)?;
856 write!(f, " }}")
857 }
858}
859
860impl<'a> Deref for BytesCData<'a> {
861 type Target = [u8];
862
863 fn deref(&self) -> &[u8] {
864 &self.content
865 }
866}
867
868#[cfg(feature = "arbitrary")]
869impl<'a> arbitrary::Arbitrary<'a> for BytesCData<'a> {
870 fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
871 Ok(Self::new(<&str>::arbitrary(u)?))
872 }
873 fn size_hint(depth: usize) -> (usize, Option<usize>) {
874 return <&str as arbitrary::Arbitrary>::size_hint(depth);
875 }
876}
877
878/// Iterator over `CDATA` sections in a string.
879///
880/// This iterator is created by the [`BytesCData::escaped`] method.
881#[derive(Clone)]
882pub struct CDataIterator<'a> {
883 /// The unprocessed data which should be emitted as `BytesCData` events.
884 /// At each iteration, the processed data is cut from this slice.
885 unprocessed: &'a [u8],
886 finished: bool,
887}
888
889impl<'a> Debug for CDataIterator<'a> {
890 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
891 f.debug_struct("CDataIterator")
892 .field("unprocessed", &Bytes(self.unprocessed))
893 .field("finished", &self.finished)
894 .finish()
895 }
896}
897
898impl<'a> Iterator for CDataIterator<'a> {
899 type Item = BytesCData<'a>;
900
901 fn next(&mut self) -> Option<BytesCData<'a>> {
902 if self.finished {
903 return None;
904 }
905
906 for gt in memchr::memchr_iter(b'>', self.unprocessed) {
907 if self.unprocessed[..gt].ends_with(b"]]") {
908 let (slice, rest) = self.unprocessed.split_at(gt);
909 self.unprocessed = rest;
910 return Some(BytesCData::wrap(slice, Decoder::utf8()));
911 }
912 }
913
914 self.finished = true;
915 Some(BytesCData::wrap(self.unprocessed, Decoder::utf8()))
916 }
917}
918
919impl FusedIterator for CDataIterator<'_> {}
920
921////////////////////////////////////////////////////////////////////////////////////////////////////
922
923/// [Processing instructions][PI] (PIs) allow documents to contain instructions for applications.
924///
925/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
926/// returns the content of this event between `<?` and `?>`.
927///
928/// Note, that inner text will not contain `?>` sequence inside:
929///
930/// ```
931/// # use quick_xml::events::{BytesPI, Event};
932/// # use quick_xml::reader::Reader;
933/// # use pretty_assertions::assert_eq;
934/// let mut reader = Reader::from_str("<?processing instruction >:-<~ ?>");
935/// let content = "processing instruction >:-<~ ";
936/// let event = BytesPI::new(content);
937///
938/// assert_eq!(reader.read_event().unwrap(), Event::PI(event.borrow()));
939/// // deref coercion of &BytesPI to &[u8]
940/// assert_eq!(&event as &[u8], content.as_bytes());
941/// // AsRef<[u8]> for &T + deref coercion
942/// assert_eq!(event.as_ref(), content.as_bytes());
943/// ```
944///
945/// [PI]: https://www.w3.org/TR/xml11/#sec-pi
946#[derive(Clone, Eq, PartialEq)]
947pub struct BytesPI<'a> {
948 content: BytesStart<'a>,
949}
950
951impl<'a> BytesPI<'a> {
952 /// Creates a new `BytesPI` from a byte sequence in the specified encoding.
953 #[inline]
954 pub(crate) const fn wrap(content: &'a [u8], target_len: usize) -> Self {
955 Self {
956 content: BytesStart::wrap(content, target_len),
957 }
958 }
959
960 /// Creates a new `BytesPI` from a string.
961 ///
962 /// # Warning
963 ///
964 /// `content` must not contain the `?>` sequence.
965 #[inline]
966 pub fn new<C: Into<Cow<'a, str>>>(content: C) -> Self {
967 let buf = str_cow_to_bytes(content);
968 let name_len = name_len(&buf);
969 Self {
970 content: BytesStart { buf, name_len },
971 }
972 }
973
974 /// Ensures that all data is owned to extend the object's lifetime if
975 /// necessary.
976 #[inline]
977 pub fn into_owned(self) -> BytesPI<'static> {
978 BytesPI {
979 content: self.content.into_owned().into(),
980 }
981 }
982
983 /// Extracts the inner `Cow` from the `BytesPI` event container.
984 #[inline]
985 pub fn into_inner(self) -> Cow<'a, [u8]> {
986 self.content.buf
987 }
988
989 /// Converts the event into a borrowed event.
990 #[inline]
991 pub fn borrow(&self) -> BytesPI {
992 BytesPI {
993 content: self.content.borrow(),
994 }
995 }
996
997 /// A target used to identify the application to which the instruction is directed.
998 ///
999 /// # Example
1000 ///
1001 /// ```
1002 /// # use pretty_assertions::assert_eq;
1003 /// use quick_xml::events::BytesPI;
1004 ///
1005 /// let instruction = BytesPI::new(r#"xml-stylesheet href="style.css""#);
1006 /// assert_eq!(instruction.target(), b"xml-stylesheet");
1007 /// ```
1008 #[inline]
1009 pub fn target(&self) -> &[u8] {
1010 self.content.name().0
1011 }
1012
1013 /// Content of the processing instruction. Contains everything between target
1014 /// name and the end of the instruction. A direct consequence is that the first
1015 /// character is always a space character.
1016 ///
1017 /// # Example
1018 ///
1019 /// ```
1020 /// # use pretty_assertions::assert_eq;
1021 /// use quick_xml::events::BytesPI;
1022 ///
1023 /// let instruction = BytesPI::new(r#"xml-stylesheet href="style.css""#);
1024 /// assert_eq!(instruction.content(), br#" href="style.css""#);
1025 /// ```
1026 #[inline]
1027 pub fn content(&self) -> &[u8] {
1028 self.content.attributes_raw()
1029 }
1030
1031 /// A view of the processing instructions' content as a list of key-value pairs.
1032 ///
1033 /// Key-value pairs are used in some processing instructions, for example in
1034 /// `<?xml-stylesheet?>`.
1035 ///
1036 /// Returned iterator does not validate attribute values as may required by
1037 /// target's rules. For example, it doesn't check that substring `?>` is not
1038 /// present in the attribute value. That shouldn't be the problem when event
1039 /// is produced by the reader, because reader detects end of processing instruction
1040 /// by the first `?>` sequence, as required by the specification, and therefore
1041 /// this sequence cannot appear inside it.
1042 ///
1043 /// # Example
1044 ///
1045 /// ```
1046 /// # use pretty_assertions::assert_eq;
1047 /// use std::borrow::Cow;
1048 /// use quick_xml::events::attributes::Attribute;
1049 /// use quick_xml::events::BytesPI;
1050 /// use quick_xml::name::QName;
1051 ///
1052 /// let instruction = BytesPI::new(r#"xml-stylesheet href="style.css""#);
1053 /// for attr in instruction.attributes() {
1054 /// assert_eq!(attr, Ok(Attribute {
1055 /// key: QName(b"href"),
1056 /// value: Cow::Borrowed(b"style.css"),
1057 /// }));
1058 /// }
1059 /// ```
1060 #[inline]
1061 pub fn attributes(&self) -> Attributes {
1062 self.content.attributes()
1063 }
1064}
1065
1066impl<'a> Debug for BytesPI<'a> {
1067 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
1068 write!(f, "BytesPI {{ content: ")?;
1069 write_cow_string(f, &self.content.buf)?;
1070 write!(f, " }}")
1071 }
1072}
1073
1074impl<'a> Deref for BytesPI<'a> {
1075 type Target = [u8];
1076
1077 fn deref(&self) -> &[u8] {
1078 &self.content
1079 }
1080}
1081
1082#[cfg(feature = "arbitrary")]
1083impl<'a> arbitrary::Arbitrary<'a> for BytesPI<'a> {
1084 fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
1085 Ok(Self::new(<&str>::arbitrary(u)?))
1086 }
1087 fn size_hint(depth: usize) -> (usize, Option<usize>) {
1088 return <&str as arbitrary::Arbitrary>::size_hint(depth);
1089 }
1090}
1091
1092////////////////////////////////////////////////////////////////////////////////////////////////////
1093
1094/// An XML declaration (`Event::Decl`).
1095///
1096/// [W3C XML 1.1 Prolog and Document Type Declaration](http://w3.org/TR/xml11/#sec-prolog-dtd)
1097///
1098/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
1099/// returns the content of this event between `<?` and `?>`.
1100///
1101/// Note, that inner text will not contain `?>` sequence inside:
1102///
1103/// ```
1104/// # use quick_xml::events::{BytesDecl, BytesStart, Event};
1105/// # use quick_xml::reader::Reader;
1106/// # use pretty_assertions::assert_eq;
1107/// let mut reader = Reader::from_str("<?xml version = '1.0' ?>");
1108/// let content = "xml version = '1.0' ";
1109/// let event = BytesDecl::from_start(BytesStart::from_content(content, 3));
1110///
1111/// assert_eq!(reader.read_event().unwrap(), Event::Decl(event.borrow()));
1112/// // deref coercion of &BytesDecl to &[u8]
1113/// assert_eq!(&event as &[u8], content.as_bytes());
1114/// // AsRef<[u8]> for &T + deref coercion
1115/// assert_eq!(event.as_ref(), content.as_bytes());
1116/// ```
1117#[derive(Clone, Debug, Eq, PartialEq)]
1118pub struct BytesDecl<'a> {
1119 content: BytesStart<'a>,
1120}
1121
1122impl<'a> BytesDecl<'a> {
1123 /// Constructs a new `XmlDecl` from the (mandatory) _version_ (should be `1.0` or `1.1`),
1124 /// the optional _encoding_ (e.g., `UTF-8`) and the optional _standalone_ (`yes` or `no`)
1125 /// attribute.
1126 ///
1127 /// Does not escape any of its inputs. Always uses double quotes to wrap the attribute values.
1128 /// The caller is responsible for escaping attribute values. Shouldn't usually be relevant since
1129 /// the double quote character is not allowed in any of the attribute values.
1130 pub fn new(
1131 version: &str,
1132 encoding: Option<&str>,
1133 standalone: Option<&str>,
1134 ) -> BytesDecl<'static> {
1135 // Compute length of the buffer based on supplied attributes
1136 // ' encoding=""' => 12
1137 let encoding_attr_len = if let Some(xs) = encoding {
1138 12 + xs.len()
1139 } else {
1140 0
1141 };
1142 // ' standalone=""' => 14
1143 let standalone_attr_len = if let Some(xs) = standalone {
1144 14 + xs.len()
1145 } else {
1146 0
1147 };
1148 // 'xml version=""' => 14
1149 let mut buf = String::with_capacity(14 + encoding_attr_len + standalone_attr_len);
1150
1151 buf.push_str("xml version=\"");
1152 buf.push_str(version);
1153
1154 if let Some(encoding_val) = encoding {
1155 buf.push_str("\" encoding=\"");
1156 buf.push_str(encoding_val);
1157 }
1158
1159 if let Some(standalone_val) = standalone {
1160 buf.push_str("\" standalone=\"");
1161 buf.push_str(standalone_val);
1162 }
1163 buf.push('"');
1164
1165 BytesDecl {
1166 content: BytesStart::from_content(buf, 3),
1167 }
1168 }
1169
1170 /// Creates a `BytesDecl` from a `BytesStart`
1171 pub const fn from_start(start: BytesStart<'a>) -> Self {
1172 Self { content: start }
1173 }
1174
1175 /// Gets xml version, excluding quotes (`'` or `"`).
1176 ///
1177 /// According to the [grammar], the version *must* be the first thing in the declaration.
1178 /// This method tries to extract the first thing in the declaration and return it.
1179 /// In case of multiple attributes value of the first one is returned.
1180 ///
1181 /// If version is missed in the declaration, or the first thing is not a version,
1182 /// [`IllFormedError::MissingDeclVersion`] will be returned.
1183 ///
1184 /// # Examples
1185 ///
1186 /// ```
1187 /// use quick_xml::errors::{Error, IllFormedError};
1188 /// use quick_xml::events::{BytesDecl, BytesStart};
1189 ///
1190 /// // <?xml version='1.1'?>
1191 /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.1'", 0));
1192 /// assert_eq!(decl.version().unwrap(), b"1.1".as_ref());
1193 ///
1194 /// // <?xml version='1.0' version='1.1'?>
1195 /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.0' version='1.1'", 0));
1196 /// assert_eq!(decl.version().unwrap(), b"1.0".as_ref());
1197 ///
1198 /// // <?xml encoding='utf-8'?>
1199 /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8'", 0));
1200 /// match decl.version() {
1201 /// Err(Error::IllFormed(IllFormedError::MissingDeclVersion(Some(key)))) => assert_eq!(key, "encoding"),
1202 /// _ => assert!(false),
1203 /// }
1204 ///
1205 /// // <?xml encoding='utf-8' version='1.1'?>
1206 /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8' version='1.1'", 0));
1207 /// match decl.version() {
1208 /// Err(Error::IllFormed(IllFormedError::MissingDeclVersion(Some(key)))) => assert_eq!(key, "encoding"),
1209 /// _ => assert!(false),
1210 /// }
1211 ///
1212 /// // <?xml?>
1213 /// let decl = BytesDecl::from_start(BytesStart::from_content("", 0));
1214 /// match decl.version() {
1215 /// Err(Error::IllFormed(IllFormedError::MissingDeclVersion(None))) => {},
1216 /// _ => assert!(false),
1217 /// }
1218 /// ```
1219 ///
1220 /// [grammar]: https://www.w3.org/TR/xml11/#NT-XMLDecl
1221 pub fn version(&self) -> Result<Cow<[u8]>, Error> {
1222 // The version *must* be the first thing in the declaration.
1223 match self.content.attributes().with_checks(false).next() {
1224 Some(Ok(a)) if a.key.as_ref() == b"version" => Ok(a.value),
1225 // first attribute was not "version"
1226 Some(Ok(a)) => {
1227 let found = from_utf8(a.key.as_ref())
1228 .map_err(|_| IllFormedError::MissingDeclVersion(None))?
1229 .to_string();
1230 Err(Error::IllFormed(IllFormedError::MissingDeclVersion(Some(
1231 found,
1232 ))))
1233 }
1234 // error parsing attributes
1235 Some(Err(e)) => Err(e.into()),
1236 // no attributes
1237 None => Err(Error::IllFormed(IllFormedError::MissingDeclVersion(None))),
1238 }
1239 }
1240
1241 /// Gets xml encoding, excluding quotes (`'` or `"`).
1242 ///
1243 /// Although according to the [grammar] encoding must appear before `"standalone"`
1244 /// and after `"version"`, this method does not check that. The first occurrence
1245 /// of the attribute will be returned even if there are several. Also, method does
1246 /// not restrict symbols that can forming the encoding, so the returned encoding
1247 /// name may not correspond to the grammar.
1248 ///
1249 /// # Examples
1250 ///
1251 /// ```
1252 /// use std::borrow::Cow;
1253 /// use quick_xml::Error;
1254 /// use quick_xml::events::{BytesDecl, BytesStart};
1255 ///
1256 /// // <?xml version='1.1'?>
1257 /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.1'", 0));
1258 /// assert!(decl.encoding().is_none());
1259 ///
1260 /// // <?xml encoding='utf-8'?>
1261 /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8'", 0));
1262 /// match decl.encoding() {
1263 /// Some(Ok(Cow::Borrowed(encoding))) => assert_eq!(encoding, b"utf-8"),
1264 /// _ => assert!(false),
1265 /// }
1266 ///
1267 /// // <?xml encoding='something_WRONG' encoding='utf-8'?>
1268 /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='something_WRONG' encoding='utf-8'", 0));
1269 /// match decl.encoding() {
1270 /// Some(Ok(Cow::Borrowed(encoding))) => assert_eq!(encoding, b"something_WRONG"),
1271 /// _ => assert!(false),
1272 /// }
1273 /// ```
1274 ///
1275 /// [grammar]: https://www.w3.org/TR/xml11/#NT-XMLDecl
1276 pub fn encoding(&self) -> Option<Result<Cow<[u8]>, AttrError>> {
1277 self.content
1278 .try_get_attribute("encoding")
1279 .map(|a| a.map(|a| a.value))
1280 .transpose()
1281 }
1282
1283 /// Gets xml standalone, excluding quotes (`'` or `"`).
1284 ///
1285 /// Although according to the [grammar] standalone flag must appear after `"version"`
1286 /// and `"encoding"`, this method does not check that. The first occurrence of the
1287 /// attribute will be returned even if there are several. Also, method does not
1288 /// restrict symbols that can forming the value, so the returned flag name may not
1289 /// correspond to the grammar.
1290 ///
1291 /// # Examples
1292 ///
1293 /// ```
1294 /// use std::borrow::Cow;
1295 /// use quick_xml::Error;
1296 /// use quick_xml::events::{BytesDecl, BytesStart};
1297 ///
1298 /// // <?xml version='1.1'?>
1299 /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.1'", 0));
1300 /// assert!(decl.standalone().is_none());
1301 ///
1302 /// // <?xml standalone='yes'?>
1303 /// let decl = BytesDecl::from_start(BytesStart::from_content(" standalone='yes'", 0));
1304 /// match decl.standalone() {
1305 /// Some(Ok(Cow::Borrowed(encoding))) => assert_eq!(encoding, b"yes"),
1306 /// _ => assert!(false),
1307 /// }
1308 ///
1309 /// // <?xml standalone='something_WRONG' encoding='utf-8'?>
1310 /// let decl = BytesDecl::from_start(BytesStart::from_content(" standalone='something_WRONG' encoding='utf-8'", 0));
1311 /// match decl.standalone() {
1312 /// Some(Ok(Cow::Borrowed(flag))) => assert_eq!(flag, b"something_WRONG"),
1313 /// _ => assert!(false),
1314 /// }
1315 /// ```
1316 ///
1317 /// [grammar]: https://www.w3.org/TR/xml11/#NT-XMLDecl
1318 pub fn standalone(&self) -> Option<Result<Cow<[u8]>, AttrError>> {
1319 self.content
1320 .try_get_attribute("standalone")
1321 .map(|a| a.map(|a| a.value))
1322 .transpose()
1323 }
1324
1325 /// Gets the actual encoding using [_get an encoding_](https://encoding.spec.whatwg.org/#concept-encoding-get)
1326 /// algorithm.
1327 ///
1328 /// If encoding in not known, or `encoding` key was not found, returns `None`.
1329 /// In case of duplicated `encoding` key, encoding, corresponding to the first
1330 /// one, is returned.
1331 #[cfg(feature = "encoding")]
1332 pub fn encoder(&self) -> Option<&'static Encoding> {
1333 self.encoding()
1334 .and_then(|e| e.ok())
1335 .and_then(|e| Encoding::for_label(&e))
1336 }
1337
1338 /// Converts the event into an owned event.
1339 pub fn into_owned(self) -> BytesDecl<'static> {
1340 BytesDecl {
1341 content: self.content.into_owned(),
1342 }
1343 }
1344
1345 /// Converts the event into a borrowed event.
1346 #[inline]
1347 pub fn borrow(&self) -> BytesDecl {
1348 BytesDecl {
1349 content: self.content.borrow(),
1350 }
1351 }
1352}
1353
1354impl<'a> Deref for BytesDecl<'a> {
1355 type Target = [u8];
1356
1357 fn deref(&self) -> &[u8] {
1358 &self.content
1359 }
1360}
1361
1362#[cfg(feature = "arbitrary")]
1363impl<'a> arbitrary::Arbitrary<'a> for BytesDecl<'a> {
1364 fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
1365 Ok(Self::new(
1366 <&str>::arbitrary(u)?,
1367 Option::<&str>::arbitrary(u)?,
1368 Option::<&str>::arbitrary(u)?,
1369 ))
1370 }
1371
1372 fn size_hint(depth: usize) -> (usize, Option<usize>) {
1373 return <&str as arbitrary::Arbitrary>::size_hint(depth);
1374 }
1375}
1376
1377////////////////////////////////////////////////////////////////////////////////////////////////////
1378
1379/// Event emitted by [`Reader::read_event_into`].
1380///
1381/// [`Reader::read_event_into`]: crate::reader::Reader::read_event_into
1382#[derive(Clone, Debug, Eq, PartialEq)]
1383#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
1384pub enum Event<'a> {
1385 /// Start tag (with attributes) `<tag attr="value">`.
1386 Start(BytesStart<'a>),
1387 /// End tag `</tag>`.
1388 End(BytesEnd<'a>),
1389 /// Empty element tag (with attributes) `<tag attr="value" />`.
1390 Empty(BytesStart<'a>),
1391 /// Escaped character data between tags.
1392 Text(BytesText<'a>),
1393 /// Unescaped character data stored in `<![CDATA[...]]>`.
1394 CData(BytesCData<'a>),
1395 /// Comment `<!-- ... -->`.
1396 Comment(BytesText<'a>),
1397 /// XML declaration `<?xml ...?>`.
1398 Decl(BytesDecl<'a>),
1399 /// Processing instruction `<?...?>`.
1400 PI(BytesPI<'a>),
1401 /// Document type definition data (DTD) stored in `<!DOCTYPE ...>`.
1402 DocType(BytesText<'a>),
1403 /// End of XML document.
1404 Eof,
1405}
1406
1407impl<'a> Event<'a> {
1408 /// Converts the event to an owned version, untied to the lifetime of
1409 /// buffer used when reading but incurring a new, separate allocation.
1410 pub fn into_owned(self) -> Event<'static> {
1411 match self {
1412 Event::Start(e) => Event::Start(e.into_owned()),
1413 Event::End(e) => Event::End(e.into_owned()),
1414 Event::Empty(e) => Event::Empty(e.into_owned()),
1415 Event::Text(e) => Event::Text(e.into_owned()),
1416 Event::Comment(e) => Event::Comment(e.into_owned()),
1417 Event::CData(e) => Event::CData(e.into_owned()),
1418 Event::Decl(e) => Event::Decl(e.into_owned()),
1419 Event::PI(e) => Event::PI(e.into_owned()),
1420 Event::DocType(e) => Event::DocType(e.into_owned()),
1421 Event::Eof => Event::Eof,
1422 }
1423 }
1424
1425 /// Converts the event into a borrowed event.
1426 #[inline]
1427 pub fn borrow(&self) -> Event {
1428 match self {
1429 Event::Start(e) => Event::Start(e.borrow()),
1430 Event::End(e) => Event::End(e.borrow()),
1431 Event::Empty(e) => Event::Empty(e.borrow()),
1432 Event::Text(e) => Event::Text(e.borrow()),
1433 Event::Comment(e) => Event::Comment(e.borrow()),
1434 Event::CData(e) => Event::CData(e.borrow()),
1435 Event::Decl(e) => Event::Decl(e.borrow()),
1436 Event::PI(e) => Event::PI(e.borrow()),
1437 Event::DocType(e) => Event::DocType(e.borrow()),
1438 Event::Eof => Event::Eof,
1439 }
1440 }
1441}
1442
1443impl<'a> Deref for Event<'a> {
1444 type Target = [u8];
1445
1446 fn deref(&self) -> &[u8] {
1447 match *self {
1448 Event::Start(ref e) | Event::Empty(ref e) => e,
1449 Event::End(ref e) => e,
1450 Event::Text(ref e) => e,
1451 Event::Decl(ref e) => e,
1452 Event::PI(ref e) => e,
1453 Event::CData(ref e) => e,
1454 Event::Comment(ref e) => e,
1455 Event::DocType(ref e) => e,
1456 Event::Eof => &[],
1457 }
1458 }
1459}
1460
1461impl<'a> AsRef<Event<'a>> for Event<'a> {
1462 fn as_ref(&self) -> &Event<'a> {
1463 self
1464 }
1465}
1466
1467////////////////////////////////////////////////////////////////////////////////////////////////////
1468
1469#[inline]
1470fn str_cow_to_bytes<'a, C: Into<Cow<'a, str>>>(content: C) -> Cow<'a, [u8]> {
1471 match content.into() {
1472 Cow::Borrowed(s) => Cow::Borrowed(s.as_bytes()),
1473 Cow::Owned(s) => Cow::Owned(s.into_bytes()),
1474 }
1475}
1476
1477fn trim_cow<'a, F>(value: Cow<'a, [u8]>, trim: F) -> Cow<'a, [u8]>
1478where
1479 F: FnOnce(&[u8]) -> &[u8],
1480{
1481 match value {
1482 Cow::Borrowed(bytes) => Cow::Borrowed(trim(bytes)),
1483 Cow::Owned(mut bytes) => {
1484 let trimmed = trim(&bytes);
1485 if trimmed.len() != bytes.len() {
1486 bytes = trimmed.to_vec();
1487 }
1488 Cow::Owned(bytes)
1489 }
1490 }
1491}
1492
1493#[cfg(test)]
1494mod test {
1495 use super::*;
1496 use pretty_assertions::assert_eq;
1497
1498 #[test]
1499 fn bytestart_create() {
1500 let b = BytesStart::new("test");
1501 assert_eq!(b.len(), 4);
1502 assert_eq!(b.name(), QName(b"test"));
1503 }
1504
1505 #[test]
1506 fn bytestart_set_name() {
1507 let mut b = BytesStart::new("test");
1508 assert_eq!(b.len(), 4);
1509 assert_eq!(b.name(), QName(b"test"));
1510 assert_eq!(b.attributes_raw(), b"");
1511 b.push_attribute(("x", "a"));
1512 assert_eq!(b.len(), 10);
1513 assert_eq!(b.attributes_raw(), b" x=\"a\"");
1514 b.set_name(b"g");
1515 assert_eq!(b.len(), 7);
1516 assert_eq!(b.name(), QName(b"g"));
1517 }
1518
1519 #[test]
1520 fn bytestart_clear_attributes() {
1521 let mut b = BytesStart::new("test");
1522 b.push_attribute(("x", "y\"z"));
1523 b.push_attribute(("x", "y\"z"));
1524 b.clear_attributes();
1525 assert!(b.attributes().next().is_none());
1526 assert_eq!(b.len(), 4);
1527 assert_eq!(b.name(), QName(b"test"));
1528 }
1529}