use crate::unescape::unescape;
use std::borrow::Cow;
use std::error::Error;
use std::fmt::{Display, Formatter};
use xmlparser::{ElementEnd, Token, Tokenizer};
pub type Depth = usize;
#[derive(Debug)]
enum XmlDecodeErrorKind {
InvalidXml(xmlparser::Error),
InvalidEscape { esc: String },
Custom(Cow<'static, str>),
Unhandled(Box<dyn std::error::Error + Send + Sync + 'static>),
}
#[derive(Debug)]
pub struct XmlDecodeError {
kind: XmlDecodeErrorKind,
}
impl Display for XmlDecodeError {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match &self.kind {
XmlDecodeErrorKind::InvalidXml(_) => write!(f, "XML parse error"),
XmlDecodeErrorKind::InvalidEscape { esc } => write!(f, "invalid XML escape: {}", esc),
XmlDecodeErrorKind::Custom(msg) => write!(f, "error parsing XML: {}", msg),
XmlDecodeErrorKind::Unhandled(_) => write!(f, "error parsing XML"),
}
}
}
impl Error for XmlDecodeError {
fn source(&self) -> Option<&(dyn Error + 'static)> {
match &self.kind {
XmlDecodeErrorKind::InvalidXml(source) => Some(source as _),
XmlDecodeErrorKind::Unhandled(source) => Some(source.as_ref() as _),
XmlDecodeErrorKind::InvalidEscape { .. } | XmlDecodeErrorKind::Custom(..) => None,
}
}
}
impl XmlDecodeError {
pub(crate) fn invalid_xml(error: xmlparser::Error) -> Self {
Self {
kind: XmlDecodeErrorKind::InvalidXml(error),
}
}
pub(crate) fn invalid_escape(esc: impl Into<String>) -> Self {
Self {
kind: XmlDecodeErrorKind::InvalidEscape { esc: esc.into() },
}
}
pub fn custom(msg: impl Into<Cow<'static, str>>) -> Self {
Self {
kind: XmlDecodeErrorKind::Custom(msg.into()),
}
}
pub fn unhandled(error: impl Into<Box<dyn Error + Send + Sync + 'static>>) -> Self {
Self {
kind: XmlDecodeErrorKind::Unhandled(error.into()),
}
}
}
#[derive(PartialEq, Debug)]
pub struct Name<'a> {
pub prefix: &'a str,
pub local: &'a str,
}
impl Name<'_> {
pub fn matches(&self, tag_name: &str) -> bool {
let split = tag_name.find(':');
match split {
None => tag_name == self.local,
Some(idx) => {
let (prefix, local) = tag_name.split_at(idx);
let local = &local[1..];
self.local == local && self.prefix == prefix
}
}
}
}
#[derive(Debug, PartialEq)]
pub struct Attr<'a> {
name: Name<'a>,
value: Cow<'a, str>,
}
#[derive(Debug, PartialEq)]
pub struct StartEl<'a> {
name: Name<'a>,
attributes: Vec<Attr<'a>>,
closed: bool,
depth: Depth,
}
impl<'a> StartEl<'a> {
pub fn depth(&self) -> Depth {
self.depth
}
fn new(local: &'a str, prefix: &'a str, depth: Depth) -> Self {
Self {
name: Name { prefix, local },
attributes: vec![],
closed: false,
depth,
}
}
pub fn attr<'b>(&'b self, key: &'b str) -> Option<&'b str> {
self.attributes
.iter()
.find(|attr| attr.name.matches(key))
.map(|attr| attr.value.as_ref())
}
pub fn matches(&self, pat: &str) -> bool {
self.name.matches(pat)
}
pub fn local(&self) -> &str {
self.name.local
}
pub fn prefix(&self) -> &str {
self.name.prefix
}
fn end_el(&self, el: ElementEnd<'_>, depth: Depth) -> bool {
if depth != self.depth {
return false;
}
match el {
ElementEnd::Open => false,
ElementEnd::Close(prefix, local) => {
prefix.as_str() == self.name.prefix && local.as_str() == self.name.local
}
ElementEnd::Empty => false,
}
}
}
pub struct Document<'a> {
tokenizer: Tokenizer<'a>,
depth: Depth,
}
impl<'a> TryFrom<&'a [u8]> for Document<'a> {
type Error = XmlDecodeError;
fn try_from(value: &'a [u8]) -> Result<Self, Self::Error> {
Ok(Document::new(
std::str::from_utf8(value).map_err(XmlDecodeError::unhandled)?,
))
}
}
impl<'inp> Document<'inp> {
pub fn new(doc: &'inp str) -> Self {
Document {
tokenizer: Tokenizer::from(doc),
depth: 0,
}
}
pub fn next_start_element<'a>(&'a mut self) -> Option<StartEl<'inp>> {
next_start_element(self)
}
pub fn root_element<'a>(&'a mut self) -> Result<ScopedDecoder<'inp, 'a>, XmlDecodeError> {
let start_el = self
.next_start_element()
.ok_or_else(|| XmlDecodeError::custom("no root element"))?;
Ok(ScopedDecoder {
doc: self,
start_el,
terminated: false,
})
}
pub fn scoped_to<'a>(&'a mut self, start_el: StartEl<'inp>) -> ScopedDecoder<'inp, 'a> {
ScopedDecoder {
doc: self,
start_el,
terminated: false,
}
}
}
#[derive(Debug)]
pub struct XmlToken<'inp>(Token<'inp>);
impl<'inp> Iterator for Document<'inp> {
type Item = Result<(XmlToken<'inp>, Depth), XmlDecodeError>;
fn next<'a>(&'a mut self) -> Option<Result<(XmlToken<'inp>, Depth), XmlDecodeError>> {
let tok = self.tokenizer.next()?;
let tok = match tok {
Err(e) => return Some(Err(XmlDecodeError::invalid_xml(e))),
Ok(tok) => tok,
};
match tok {
Token::ElementEnd {
end: ElementEnd::Close(_, _),
..
} => {
self.depth -= 1;
}
Token::ElementEnd {
end: ElementEnd::Empty,
..
} => self.depth -= 1,
t @ Token::ElementStart { .. } => {
self.depth += 1;
return Some(Ok((XmlToken(t), self.depth - 1)));
}
_ => {}
}
Some(Ok((XmlToken(tok), self.depth)))
}
}
pub struct ScopedDecoder<'inp, 'a> {
doc: &'a mut Document<'inp>,
start_el: StartEl<'inp>,
terminated: bool,
}
impl Drop for ScopedDecoder<'_, '_> {
fn drop(&mut self) {
for _ in self {}
}
}
impl<'inp> ScopedDecoder<'inp, '_> {
pub fn start_el<'a>(&'a self) -> &'a StartEl<'inp> {
&self.start_el
}
pub fn next_tag<'a>(&'a mut self) -> Option<ScopedDecoder<'inp, 'a>> {
let next_tag = next_start_element(self)?;
Some(self.nested_decoder(next_tag))
}
fn nested_decoder<'a>(&'a mut self, start_el: StartEl<'inp>) -> ScopedDecoder<'inp, 'a> {
ScopedDecoder {
doc: self.doc,
start_el,
terminated: false,
}
}
}
impl<'inp, 'a> Iterator for ScopedDecoder<'inp, 'a> {
type Item = Result<(XmlToken<'inp>, Depth), XmlDecodeError>;
fn next(&mut self) -> Option<Self::Item> {
if self.start_el.closed {
self.terminated = true;
}
if self.terminated {
return None;
}
let (tok, depth) = match self.doc.next() {
Some(Ok((tok, depth))) => (tok, depth),
other => return other,
};
match tok.0 {
Token::ElementEnd { end, .. } if self.start_el.end_el(end, depth) => {
self.terminated = true;
return None;
}
_ => {}
}
Some(Ok((tok, depth)))
}
}
fn next_start_element<'a, 'inp>(
tokens: &'a mut impl Iterator<Item = Result<(XmlToken<'inp>, Depth), XmlDecodeError>>,
) -> Option<StartEl<'inp>> {
let mut out = StartEl::new("", "", 0);
loop {
match tokens.next()? {
Ok((XmlToken(Token::ElementStart { local, prefix, .. }), depth)) => {
out.name.local = local.as_str();
out.name.prefix = prefix.as_str();
out.depth = depth;
}
Ok((
XmlToken(Token::Attribute {
prefix,
local,
value,
..
}),
_,
)) => out.attributes.push(Attr {
name: Name {
local: local.as_str(),
prefix: prefix.as_str(),
},
value: unescape(value.as_str()).ok()?,
}),
Ok((
XmlToken(Token::ElementEnd {
end: ElementEnd::Open,
..
}),
_,
)) => break,
Ok((
XmlToken(Token::ElementEnd {
end: ElementEnd::Empty,
..
}),
_,
)) => {
out.closed = true;
break;
}
_ => {}
}
}
Some(out)
}
pub fn try_data<'a, 'inp>(
tokens: &'a mut impl Iterator<Item = Result<(XmlToken<'inp>, Depth), XmlDecodeError>>,
) -> Result<Cow<'inp, str>, XmlDecodeError> {
loop {
match tokens.next().map(|opt| opt.map(|opt| opt.0)) {
None => return Ok(Cow::Borrowed("")),
Some(Ok(XmlToken(Token::Text { text }))) => return unescape(text.as_str()),
Some(Ok(e @ XmlToken(Token::ElementStart { .. }))) => {
return Err(XmlDecodeError::custom(format!(
"looking for a data element, found: {:?}",
e
)))
}
Some(Err(e)) => return Err(e),
_ => {}
}
}
}
#[cfg(test)]
mod test {
use crate::decode::{try_data, Attr, Depth, Document, Name, StartEl};
fn closed<'a>(local: &'a str, prefix: &'a str, depth: Depth) -> StartEl<'a> {
let mut s = StartEl::new(local, prefix, depth);
s.closed = true;
s
}
#[test]
fn scoped_tokens() {
let xml = r#"<Response><A></A></Response>"#;
let mut doc = Document::new(xml);
let mut root = doc.root_element().expect("valid document");
assert_eq!(root.start_el().local(), "Response");
assert_eq!(root.next_tag().expect("tag exists").start_el().local(), "A");
assert!(root.next_tag().is_none());
}
#[test]
fn handle_depth_properly() {
let xml = r#"<Response><Response></Response><A/></Response>"#;
let mut doc = Document::new(xml);
let mut scoped = doc.root_element().expect("valid document");
assert_eq!(
scoped.next_tag().unwrap().start_el(),
&StartEl::new("Response", "", 1)
);
let closed_a = closed("A", "", 1);
assert_eq!(scoped.next_tag().unwrap().start_el(), &closed_a);
assert!(scoped.next_tag().is_none())
}
#[test]
fn self_closing() {
let xml = r#"<Response/>"#;
let mut doc = Document::new(xml);
let mut scoped = doc.root_element().expect("valid doc");
assert!(scoped.start_el.closed);
assert!(scoped.next_tag().is_none())
}
#[test]
fn terminate_scope() {
let xml = r#"<Response><Struct><A></A><Also/></Struct><More/></Response>"#;
let mut doc = Document::new(xml);
let mut response_iter = doc.root_element().expect("valid doc");
let mut struct_iter = response_iter.next_tag().unwrap();
assert_eq!(
struct_iter.next_tag().as_ref().map(|t| t.start_el()),
Some(&StartEl::new("A", "", 2))
);
drop(struct_iter);
assert_eq!(
response_iter.next_tag().unwrap().start_el(),
&closed("More", "", 1)
);
}
#[test]
fn read_data_invalid() {
let xml = r#"<Response><A></A></Response>"#;
let mut doc = Document::new(xml);
let mut resp = doc.root_element().unwrap();
try_data(&mut resp).expect_err("no data");
}
#[test]
fn read_data() {
let xml = r#"<Response>hello</Response>"#;
let mut doc = Document::new(xml);
let mut scoped = doc.root_element().unwrap();
assert_eq!(try_data(&mut scoped).unwrap(), "hello");
}
#[test]
fn read_data_whitespace() {
let xml = r#"<Response> hello </Response>"#;
let mut doc = Document::new(xml);
let mut scoped = doc.root_element().unwrap();
assert_eq!(try_data(&mut scoped).unwrap(), " hello ");
}
#[test]
fn ignore_insignificant_whitespace() {
let xml = r#"<Response> <A> </A> </Response>"#;
let mut doc = Document::new(xml);
let mut resp = doc.root_element().unwrap();
let mut a = resp.next_tag().expect("should be a");
let data = try_data(&mut a).expect("valid");
assert_eq!(data, " ");
}
#[test]
fn read_attributes() {
let xml = r#"<Response xsi:type="CanonicalUser">hello</Response>"#;
let mut tokenizer = Document::new(xml);
let root = tokenizer.root_element().unwrap();
assert_eq!(
root.start_el().attributes,
vec![Attr {
name: Name {
prefix: "xsi",
local: "type"
},
value: "CanonicalUser".into()
}]
)
}
#[test]
fn unescape_data() {
let xml = r#"<Response key=""hey">">></Response>"#;
let mut doc = Document::new(xml);
let mut root = doc.root_element().unwrap();
assert_eq!(try_data(&mut root).unwrap(), ">");
assert_eq!(root.start_el().attr("key"), Some("\"hey\">"));
}
#[test]
fn nested_self_closer() {
let xml = r#"<XmlListsInputOutput>
<stringList/>
<stringSet></stringSet>
</XmlListsInputOutput>"#;
let mut doc = Document::new(xml);
let mut root = doc.root_element().unwrap();
let mut string_list = root.next_tag().unwrap();
assert_eq!(string_list.start_el(), &closed("stringList", "", 1));
assert!(string_list.next_tag().is_none());
drop(string_list);
assert_eq!(
root.next_tag().unwrap().start_el(),
&StartEl::new("stringSet", "", 1)
);
}
#[test]
fn confusing_nested_same_name_tag() {
let root_tags = &["a", "b", "c", "d"];
let xml = r#"<XmlListsInputOutput>
<a/>
<b>
<c/>
<b></b>
<here/>
</b>
<c></c>
<d>more</d>
</XmlListsInputOutput>"#;
let mut doc = Document::new(xml);
let mut root = doc.root_element().unwrap();
let mut cmp = vec![];
while let Some(tag) = root.next_tag() {
cmp.push(tag.start_el().local().to_owned());
}
assert_eq!(root_tags, cmp.as_slice());
}
}