xml/reader/
events.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
//! Contains `XmlEvent` datatype, instances of which are emitted by the parser.

use crate::attribute::OwnedAttribute;
use crate::common::XmlVersion;
use crate::name::OwnedName;
use crate::namespace::Namespace;
use std::fmt;

/// An element of an XML input stream.
///
/// Items of this enum are emitted by `reader::EventReader`. They correspond to different
/// elements of an XML document.
#[derive(PartialEq, Clone)]
pub enum XmlEvent {
    /// Corresponds to XML document declaration.
    ///
    /// This event is always emitted before any other event. It is emitted
    /// even if the actual declaration is not present in the document.
    StartDocument {
        /// XML version.
        ///
        /// If XML declaration is not present, defaults to `Version10`.
        version: XmlVersion,

        /// XML document encoding.
        ///
        /// If XML declaration is not present or does not contain `encoding` attribute,
        /// defaults to `"UTF-8"`. This field is currently used for no other purpose than
        /// informational.
        encoding: String,

        /// XML standalone declaration.
        ///
        /// If XML document is not present or does not contain `standalone` attribute,
        /// defaults to `None`. This field is currently used for no other purpose than
        /// informational.
        standalone: Option<bool>,
    },

    /// Denotes to the end of the document stream.
    ///
    /// This event is always emitted after any other event (except `Error`). After it
    /// is emitted for the first time, it will always be emitted on next event pull attempts.
    EndDocument,

    /// Denotes an XML processing instruction.
    ///
    /// This event contains a processing instruction target (`name`) and opaque `data`. It
    /// is up to the application to process them.
    ProcessingInstruction {
        /// Processing instruction target.
        name: String,

        /// Processing instruction content.
        data: Option<String>,
    },

    /// Denotes a beginning of an XML element.
    ///
    /// This event is emitted after parsing opening tags or after parsing bodiless tags. In the
    /// latter case `EndElement` event immediately follows.
    StartElement {
        /// Qualified name of the element.
        name: OwnedName,

        /// A list of attributes associated with the element.
        ///
        /// Currently attributes are not checked for duplicates (TODO)
        attributes: Vec<OwnedAttribute>,

        /// Contents of the namespace mapping at this point of the document.
        namespace: Namespace,
    },

    /// Denotes an end of an XML element.
    ///
    /// This event is emitted after parsing closing tags or after parsing bodiless tags. In the
    /// latter case it is emitted immediately after corresponding `StartElement` event.
    EndElement {
        /// Qualified name of the element.
        name: OwnedName,
    },

    /// Denotes CDATA content.
    ///
    /// This event contains unparsed data. No unescaping will be performed.
    ///
    /// It is possible to configure a parser to emit `Characters` event instead of `CData`. See
    /// `pull::ParserConfiguration` structure for more information.
    CData(String),

    /// Denotes a comment.
    ///
    /// It is possible to configure a parser to ignore comments, so this event will never be emitted.
    /// See `pull::ParserConfiguration` structure for more information.
    Comment(String),

    /// Denotes character data outside of tags.
    ///
    /// Contents of this event will always be unescaped, so no entities like `&lt;` or `&amp;` or `&#123;`
    /// will appear in it.
    ///
    /// It is possible to configure a parser to trim leading and trailing whitespace for this event.
    /// See `pull::ParserConfiguration` structure for more information.
    Characters(String),

    /// Denotes a chunk of whitespace outside of tags.
    ///
    /// It is possible to configure a parser to emit `Characters` event instead of `Whitespace`.
    /// See `pull::ParserConfiguration` structure for more information. When combined with whitespace
    /// trimming, it will eliminate standalone whitespace from the event stream completely.
    Whitespace(String),
}

impl fmt::Debug for XmlEvent {
    #[cold]
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        match *self {
            XmlEvent::StartDocument { ref version, ref encoding, standalone } =>
                write!(f, "StartDocument({}, {}, {:?})", version, *encoding, standalone),
            XmlEvent::EndDocument =>
                write!(f, "EndDocument"),
            XmlEvent::ProcessingInstruction { ref name, ref data } =>
                write!(f, "ProcessingInstruction({}{})", *name, match *data {
                    Some(ref data) => format!(", {data}"),
                    None       => String::new()
                }),
            XmlEvent::StartElement { ref name, ref attributes, namespace: Namespace(ref namespace) } =>
                write!(f, "StartElement({}, {:?}{})", name, namespace, if attributes.is_empty() {
                    String::new()
                } else {
                    let attributes: Vec<String> = attributes.iter().map(
                        |a| format!("{} -> {}", a.name, a.value)
                    ).collect();
                    format!(", [{}]", attributes.join(", "))
                }),
            XmlEvent::EndElement { ref name } =>
                write!(f, "EndElement({name})"),
            XmlEvent::Comment(ref data) =>
                write!(f, "Comment({data})"),
            XmlEvent::CData(ref data) =>
                write!(f, "CData({data})"),
            XmlEvent::Characters(ref data) =>
                write!(f, "Characters({data})"),
            XmlEvent::Whitespace(ref data) =>
                write!(f, "Whitespace({data})")
        }
    }
}

impl XmlEvent {
    /// Obtains a writer event from this reader event.
    ///
    /// This method is useful for streaming processing of XML documents where the output
    /// is also an XML document. With this method it is possible to process some events
    /// while passing other events through to the writer unchanged:
    ///
    /// ```rust
    /// use std::str;
    ///
    /// use xml::reader::XmlEvent as ReaderEvent;
    /// use xml::writer::XmlEvent as WriterEvent;
    /// use xml::{EventReader, EventWriter};
    ///
    /// let mut input: &[u8] = b"<hello>world</hello>";
    /// let mut output: Vec<u8> = Vec::new();
    ///
    /// {
    ///     let mut reader = EventReader::new(&mut input);
    ///     let mut writer = EventWriter::new(&mut output);
    ///
    ///     for e in reader {
    ///         match e.unwrap() {
    ///             ReaderEvent::Characters(s) =>
    ///                 writer.write(WriterEvent::characters(&s.to_uppercase())).unwrap(),
    ///             e => if let Some(e) = e.as_writer_event() {
    ///                 writer.write(e).unwrap()
    ///             }
    ///         }
    ///     }
    /// }
    ///
    /// assert_eq!(
    ///     str::from_utf8(&output).unwrap(),
    ///     r#"<?xml version="1.0" encoding="UTF-8"?><hello>WORLD</hello>"#
    /// );
    /// ```
    ///
    /// Note that this API may change or get additions in future to improve its ergonomics.
    #[must_use]
    pub fn as_writer_event(&self) -> Option<crate::writer::events::XmlEvent<'_>> {
        match *self {
            XmlEvent::StartDocument { version, ref encoding, standalone } =>
                Some(crate::writer::events::XmlEvent::StartDocument {
                    version,
                    encoding: Some(encoding),
                    standalone
                }),
            XmlEvent::ProcessingInstruction { ref name, ref data } =>
                Some(crate::writer::events::XmlEvent::ProcessingInstruction {
                    name,
                    data: data.as_ref().map(|s| &**s)
                }),
            XmlEvent::StartElement { ref name, ref attributes, ref namespace } =>
                Some(crate::writer::events::XmlEvent::StartElement {
                    name: name.borrow(),
                    attributes: attributes.iter().map(|a| a.borrow()).collect(),
                    namespace: namespace.borrow(),
                }),
            XmlEvent::EndElement { ref name } =>
                Some(crate::writer::events::XmlEvent::EndElement { name: Some(name.borrow()) }),
            XmlEvent::Comment(ref data) => Some(crate::writer::events::XmlEvent::Comment(data)),
            XmlEvent::CData(ref data) => Some(crate::writer::events::XmlEvent::CData(data)),
            XmlEvent::Characters(ref data) |
            XmlEvent::Whitespace(ref data) => Some(crate::writer::events::XmlEvent::Characters(data)),
            XmlEvent::EndDocument => None,
        }
    }
}