use std::borrow::{Cow, Borrow};
use std::error;
use std::fmt;
use std::mem;
use std::str;
use xmlparser::{
self,
Reference,
Stream,
StrSpan,
TextPos,
};
use crate::{
NS_XML_URI,
NS_XMLNS_URI,
Attribute,
Document,
ExpandedNameOwned,
Namespaces,
NodeData,
NodeId,
NodeKind,
PI,
ShortRange,
};
#[derive(Debug)]
pub enum Error {
InvalidXmlPrefixUri(TextPos),
UnexpectedXmlUri(TextPos),
UnexpectedXmlnsUri(TextPos),
InvalidElementNamePrefix(TextPos),
DuplicatedNamespace(String, TextPos),
UnknownNamespace(String, TextPos),
#[allow(missing_docs)]
UnexpectedCloseTag { expected: String, actual: String, pos: TextPos },
UnexpectedEntityCloseTag(TextPos),
UnknownEntityReference(String, TextPos),
MalformedEntityReference(TextPos),
EntityReferenceLoop(TextPos),
InvalidAttributeValue(TextPos),
DuplicatedAttribute(String, TextPos),
NoRootNode,
SizeLimit,
ParserError(xmlparser::Error),
}
impl Error {
#[inline]
pub fn pos(&self) -> TextPos {
match *self {
Error::InvalidXmlPrefixUri(pos) => pos,
Error::UnexpectedXmlUri(pos) => pos,
Error::UnexpectedXmlnsUri(pos) => pos,
Error::InvalidElementNamePrefix(pos) => pos,
Error::DuplicatedNamespace(ref _name, pos) => pos,
Error::UnknownNamespace(ref _name, pos) => pos,
Error::UnexpectedCloseTag { pos, .. } => pos,
Error::UnexpectedEntityCloseTag(pos) => pos,
Error::UnknownEntityReference(ref _name, pos) => pos,
Error::MalformedEntityReference(pos) => pos,
Error::EntityReferenceLoop(pos) => pos,
Error::InvalidAttributeValue(pos) => pos,
Error::DuplicatedAttribute(ref _name, pos) => pos,
Error::ParserError(ref err) => err.pos(),
_ => TextPos::new(1, 1)
}
}
}
impl From<xmlparser::Error> for Error {
#[inline]
fn from(e: xmlparser::Error) -> Self {
Error::ParserError(e)
}
}
impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match *self {
Error::InvalidXmlPrefixUri(pos) => {
write!(f, "'xml' namespace prefix mapped to wrong URI at {}", pos)
}
Error::UnexpectedXmlUri(pos) => {
write!(f, "the 'xml' namespace URI is used for not 'xml' prefix at {}", pos)
}
Error::UnexpectedXmlnsUri(pos) => {
write!(f, "the 'xmlns' URI is used at {}, but it must not be declared", pos)
}
Error::InvalidElementNamePrefix(pos) => {
write!(f, "the 'xmlns' prefix is used at {}, but it must not be", pos)
}
Error::DuplicatedNamespace(ref name, pos) => {
write!(f, "namespace '{}' at {} is already defined", name, pos)
}
Error::UnknownNamespace(ref name, pos) => {
write!(f, "an unknown namespace prefix '{}' at {}", name, pos)
}
Error::UnexpectedCloseTag { ref expected, ref actual, pos } => {
write!(f, "expected '{}' tag, not '{}' at {}", expected, actual, pos)
}
Error::UnexpectedEntityCloseTag(pos) => {
write!(f, "unexpected close tag at {}", pos)
}
Error::MalformedEntityReference(pos) => {
write!(f, "malformed entity reference at {}", pos)
}
Error::UnknownEntityReference(ref name, pos) => {
write!(f, "unknown entity reference '{}' at {}", name, pos)
}
Error::EntityReferenceLoop(pos) => {
write!(f, "a possible entity reference loop is detected at {}", pos)
}
Error::InvalidAttributeValue(pos) => {
write!(f, "unescaped '<' found at {}", pos)
}
Error::DuplicatedAttribute(ref name, pos) => {
write!(f, "attribute '{}' at {} is already defined", name, pos)
}
Error::NoRootNode => {
write!(f, "the document does not have a root node")
}
Error::SizeLimit => {
write!(f, "the input string should be smaller than 4GiB")
}
Error::ParserError(ref err) => {
write!(f, "{}", err)
}
}
}
}
impl error::Error for Error {
#[inline]
fn description(&self) -> &str {
"an XML parsing error"
}
}
struct AttributeData<'input> {
prefix: StrSpan<'input>,
local: StrSpan<'input>,
value: Cow<'input, str>,
range: ShortRange,
value_range: ShortRange,
}
impl<'input> Document<'input> {
#[inline]
pub fn parse(text: &str) -> Result<Document, Error> {
parse(text)
}
fn append(
&mut self,
parent_id: NodeId,
kind: NodeKind<'input>,
range: ShortRange,
pd: &mut ParserData,
) -> NodeId {
let new_child_id = NodeId::from(self.nodes.len());
let appending_element = match kind {
NodeKind::Element {..} => true,
_ => false
};
self.nodes.push(NodeData {
parent: Some(parent_id),
prev_sibling: None,
next_subtree: None,
last_child: None,
kind,
range,
});
let last_child_id = self.nodes[parent_id.get_usize()].last_child;
self.nodes[new_child_id.get_usize()].prev_sibling = last_child_id;
self.nodes[parent_id.get_usize()].last_child = Some(new_child_id);
pd.awaiting_subtree.iter().for_each(|id| {
self.nodes[id.get_usize()].next_subtree = Some(new_child_id);
});
pd.awaiting_subtree.clear();
if !appending_element {
pd.awaiting_subtree.push(NodeId::from(self.nodes.len() - 1));
}
new_child_id
}
}
struct Entity<'input> {
name: &'input str,
value: StrSpan<'input>,
}
struct ParserData<'input> {
attrs_start_idx: usize,
ns_start_idx: usize,
tmp_attrs: Vec<AttributeData<'input>>,
awaiting_subtree: Vec<NodeId>,
entities: Vec<Entity<'input>>,
buffer: TextBuffer,
after_text: bool,
}
#[derive(Clone, Copy)]
struct TagNameSpan<'input> {
prefix: StrSpan<'input>,
name: StrSpan<'input>,
span: StrSpan<'input>,
}
impl<'input> TagNameSpan<'input> {
#[inline]
fn new_null() -> Self {
Self {
prefix: StrSpan::from(""),
name: StrSpan::from(""),
span: StrSpan::from(""),
}
}
#[inline]
fn new(prefix: StrSpan<'input>, name: StrSpan<'input>, span: StrSpan<'input>) -> Self {
Self { prefix, name, span }
}
}
#[derive(Default)]
struct LoopDetector {
depth: u8,
references: u8,
}
impl LoopDetector {
#[inline]
fn inc_depth(&mut self, s: &Stream) -> Result<(), Error> {
if self.depth < 10 {
self.depth += 1;
Ok(())
} else {
Err(Error::EntityReferenceLoop(s.gen_text_pos()))
}
}
#[inline]
fn dec_depth(&mut self) {
if self.depth > 0 {
self.depth -= 1;
}
if self.depth == 0 {
self.references = 0;
}
}
#[inline]
fn inc_references(&mut self, s: &Stream) -> Result<(), Error> {
if self.depth == 0 {
Ok(())
} else {
if self.references == std::u8::MAX {
return Err(Error::EntityReferenceLoop(s.gen_text_pos()));
}
self.references += 1;
Ok(())
}
}
}
fn parse(text: &str) -> Result<Document, Error> {
if text.len() > std::u32::MAX as usize {
return Err(Error::SizeLimit);
}
let mut pd = ParserData {
attrs_start_idx: 0,
ns_start_idx: 1,
tmp_attrs: Vec::with_capacity(16),
entities: Vec::new(),
awaiting_subtree: Vec::new(),
buffer: TextBuffer::new(),
after_text: false,
};
let nodes_capacity = text.bytes().filter(|c| *c == b'<').count();
let attributes_capacity = text.bytes().filter(|c| *c == b'=').count();
let mut doc = Document {
text,
nodes: Vec::with_capacity(nodes_capacity),
attrs: Vec::with_capacity(attributes_capacity),
namespaces: Namespaces(Vec::new()),
};
doc.nodes.push(NodeData {
parent: None,
prev_sibling: None,
next_subtree: None,
last_child: None,
kind: NodeKind::Root,
range: (0..text.len()).into(),
});
doc.namespaces.push_ns(Some("xml"), Cow::Borrowed(NS_XML_URI));
let parser = xmlparser::Tokenizer::from(text);
let parent_id = doc.root().id;
let mut tag_name = TagNameSpan::new_null();
process_tokens(parser, parent_id, &mut LoopDetector::default(),
&mut tag_name, &mut pd, &mut doc)?;
if !doc.root().children().any(|n| n.is_element()) {
return Err(Error::NoRootNode);
}
doc.nodes.shrink_to_fit();
doc.attrs.shrink_to_fit();
doc.namespaces.0.shrink_to_fit();
Ok(doc)
}
fn process_tokens<'input>(
parser: xmlparser::Tokenizer<'input>,
mut parent_id: NodeId,
loop_detector: &mut LoopDetector,
tag_name: &mut TagNameSpan<'input>,
pd: &mut ParserData<'input>,
doc: &mut Document<'input>,
) -> Result<(), Error> {
for token in parser {
let token = token?;
match token {
xmlparser::Token::ProcessingInstruction { target, content, span } => {
let pi = NodeKind::PI(PI {
target: target.as_str(),
value: content.map(|v| v.as_str()),
});
doc.append(parent_id, pi, span.range().into(), pd);
}
xmlparser::Token::Comment { text, span } => {
doc.append(parent_id, NodeKind::Comment(text.as_str()), span.range().into(), pd);
}
xmlparser::Token::Text { text } => {
process_text(text, parent_id, loop_detector, pd, doc)?;
}
xmlparser::Token::Cdata { text, span } => {
let cow_str = Cow::Borrowed(text.as_str());
append_text(cow_str, parent_id, span.range().into(), pd.after_text, doc, pd);
pd.after_text = true;
}
xmlparser::Token::ElementStart { prefix, local, span } => {
if prefix.as_str() == "xmlns" {
let pos = err_pos_from_span(doc.text, prefix);
return Err(Error::InvalidElementNamePrefix(pos));
}
*tag_name = TagNameSpan::new(prefix, local, span);
}
xmlparser::Token::Attribute { prefix, local, value, span } => {
process_attribute(prefix, local, value, span, loop_detector, pd, doc)?;
}
xmlparser::Token::ElementEnd { end, span } => {
process_element(*tag_name, end, span, &mut parent_id, pd, doc)?;
}
xmlparser::Token::EntityDeclaration { name, definition, .. } => {
if let xmlparser::EntityDefinition::EntityValue(value) = definition {
pd.entities.push(Entity { name: name.as_str(), value });
}
}
_ => {}
}
match token {
xmlparser::Token::ProcessingInstruction { .. } |
xmlparser::Token::Comment { .. } |
xmlparser::Token::ElementStart { .. } |
xmlparser::Token::ElementEnd { .. } => {
pd.after_text = false;
}
_ => {}
}
}
Ok(())
}
fn process_attribute<'input>(
prefix: StrSpan<'input>,
local: StrSpan<'input>,
value: StrSpan<'input>,
token_span: StrSpan<'input>,
loop_detector: &mut LoopDetector,
pd: &mut ParserData<'input>,
doc: &mut Document<'input>,
) -> Result<(), Error> {
let range = token_span.range().into();
let value_range = value.range().into();
let value = normalize_attribute(doc.text, value, &pd.entities, loop_detector, &mut pd.buffer)?;
if prefix.as_str() == "xmlns" {
if value == NS_XMLNS_URI {
let pos = err_pos_from_qname(doc.text, prefix, local);
return Err(Error::UnexpectedXmlnsUri(pos));
}
let is_xml_ns_uri = value == NS_XML_URI;
if local.as_str() == "xml" {
if !is_xml_ns_uri {
let pos = err_pos_from_span(doc.text, prefix);
return Err(Error::InvalidXmlPrefixUri(pos));
}
} else {
if is_xml_ns_uri {
let pos = err_pos_from_span(doc.text, prefix);
return Err(Error::UnexpectedXmlUri(pos));
}
}
if doc.namespaces.exists(pd.ns_start_idx, Some(local.as_str())) {
let pos = err_pos_from_qname(doc.text, prefix, local);
return Err(Error::DuplicatedNamespace(local.as_str().to_string(), pos));
}
if !is_xml_ns_uri {
doc.namespaces.push_ns(Some(local.as_str()), value);
}
} else if local.as_str() == "xmlns" {
if value == NS_XML_URI {
let pos = err_pos_from_span(doc.text, local);
return Err(Error::UnexpectedXmlUri(pos));
}
if value == NS_XMLNS_URI {
let pos = err_pos_from_span(doc.text, local);
return Err(Error::UnexpectedXmlnsUri(pos));
}
doc.namespaces.push_ns(None, value);
} else {
pd.tmp_attrs.push(AttributeData {
prefix, local, value, range, value_range
});
}
Ok(())
}
fn process_element<'input>(
tag_name: TagNameSpan<'input>,
end_token: xmlparser::ElementEnd<'input>,
token_span: StrSpan<'input>,
parent_id: &mut NodeId,
pd: &mut ParserData<'input>,
doc: &mut Document<'input>,
) -> Result<(), Error> {
if tag_name.name.is_empty() {
if let xmlparser::ElementEnd::Close(..) = end_token {
return Err(Error::UnexpectedEntityCloseTag(err_pos_from_span(doc.text, token_span)));
} else {
unreachable!("should be already checked by the xmlparser");
}
}
let namespaces = resolve_namespaces(pd.ns_start_idx, *parent_id, doc);
pd.ns_start_idx = doc.namespaces.len();
let attributes = resolve_attributes(pd.attrs_start_idx, namespaces.clone(),
&mut pd.tmp_attrs, doc)?;
pd.attrs_start_idx = doc.attrs.len();
pd.tmp_attrs.clear();
match end_token {
xmlparser::ElementEnd::Empty => {
let tag_ns_uri = get_ns_by_prefix(doc, namespaces.clone(), tag_name.prefix)?;
let new_element_id = doc.append(*parent_id,
NodeKind::Element {
tag_name: ExpandedNameOwned {
ns: tag_ns_uri,
prefix: tag_name.prefix.as_str(),
name: tag_name.name.as_str(),
},
attributes,
namespaces,
},
(tag_name.span.start()..token_span.end()).into(),
pd
);
pd.awaiting_subtree.push(new_element_id);
}
xmlparser::ElementEnd::Close(prefix, local) => {
let prefix = prefix.as_str();
let local = local.as_str();
doc.nodes[parent_id.get_usize()].range.end = token_span.end() as u32;
if let NodeKind::Element { ref tag_name, .. } = doc.nodes[parent_id.get_usize()].kind {
if prefix != tag_name.prefix || local != tag_name.name {
return Err(Error::UnexpectedCloseTag {
expected: gen_qname_string(tag_name.prefix, tag_name.name),
actual: gen_qname_string(prefix, local),
pos: err_pos_from_span(doc.text, token_span),
});
}
}
pd.awaiting_subtree.push(*parent_id);
if let Some(id) = doc.nodes[parent_id.get_usize()].parent {
*parent_id = id;
} else {
unreachable!("should be already checked by the xmlparser");
}
}
xmlparser::ElementEnd::Open => {
let tag_ns_uri = get_ns_by_prefix(doc, namespaces.clone(), tag_name.prefix)?;
*parent_id = doc.append(*parent_id,
NodeKind::Element {
tag_name: ExpandedNameOwned {
ns: tag_ns_uri,
prefix: tag_name.prefix.as_str(),
name: tag_name.name.as_str(),
},
attributes,
namespaces,
},
(tag_name.span.start()..token_span.end()).into(),
pd
);
}
}
Ok(())
}
fn resolve_namespaces(
start_idx: usize,
parent_id: NodeId,
doc: &mut Document,
) -> ShortRange {
if let NodeKind::Element { ref namespaces, .. } = doc.nodes[parent_id.get_usize()].kind {
let parent_ns = namespaces.clone();
if start_idx == doc.namespaces.len() {
return parent_ns;
}
for i in parent_ns.to_urange() {
if !doc.namespaces.exists(start_idx, doc.namespaces[i].name) {
let v = doc.namespaces[i].clone();
doc.namespaces.0.push(v);
}
}
}
(start_idx..doc.namespaces.len()).into()
}
fn resolve_attributes<'input>(
start_idx: usize,
namespaces: ShortRange,
tmp_attrs: &mut [AttributeData<'input>],
doc: &mut Document<'input>,
) -> Result<ShortRange, Error> {
if tmp_attrs.is_empty() {
return Ok(ShortRange::new(0, 0));
}
for attr in tmp_attrs {
let ns = if attr.prefix.as_str() == "xml" {
Some(Cow::Borrowed(NS_XML_URI))
} else if attr.prefix.is_empty() {
None
} else {
get_ns_by_prefix(doc, namespaces.clone(), attr.prefix)?
};
let attr_name = ExpandedNameOwned { ns, prefix: "", name: attr.local.as_str() };
if doc.attrs[start_idx..].iter().any(|attr| attr.name == attr_name) {
let pos = err_pos_from_qname(doc.text, attr.prefix, attr.local);
return Err(Error::DuplicatedAttribute(attr.local.to_string(), pos));
}
doc.attrs.push(Attribute {
name: attr_name,
value: mem::replace(&mut attr.value, Cow::Borrowed("")),
range: attr.range.clone(),
value_range: attr.value_range.clone(),
});
}
Ok((start_idx..doc.attrs.len()).into())
}
fn process_text<'input>(
text: StrSpan<'input>,
parent_id: NodeId,
loop_detector: &mut LoopDetector,
pd: &mut ParserData<'input>,
doc: &mut Document<'input>,
) -> Result<(), Error> {
if !text.as_str().bytes().any(|b| b == b'&' || b == b'\r') {
append_text(Cow::Borrowed(text.as_str()), parent_id, text.range().into(), pd.after_text, doc, pd);
pd.after_text = true;
return Ok(());
}
pd.buffer.clear();
let mut is_as_is = false;
let mut s = Stream::from_substr(doc.text, text.range());
while !s.at_end() {
match parse_next_chunk(&mut s, &pd.entities)? {
NextChunk::Byte(c) => {
if is_as_is {
pd.buffer.push_raw(c);
is_as_is = false;
} else {
pd.buffer.push_from_text(c, s.at_end());
}
}
NextChunk::Char(c) => {
for b in CharToBytes::new(c) {
if loop_detector.depth > 0 {
pd.buffer.push_from_text(b, s.at_end());
} else {
pd.buffer.push_raw(b);
is_as_is = true;
}
}
}
NextChunk::Text(fragment) => {
is_as_is = false;
if !pd.buffer.is_empty() {
let cow_text = Cow::Owned(pd.buffer.to_str().to_owned());
append_text(cow_text, parent_id, text.range().into(), pd.after_text, doc, pd);
pd.after_text = true;
pd.buffer.clear();
}
loop_detector.inc_references(&s)?;
loop_detector.inc_depth(&s)?;
let parser = xmlparser::Tokenizer::from_fragment(doc.text, fragment.range());
let mut tag_name = TagNameSpan::new_null();
process_tokens(parser, parent_id, loop_detector, &mut tag_name, pd, doc)?;
pd.buffer.clear();
loop_detector.dec_depth();
}
}
}
if !pd.buffer.is_empty() {
let cow_text = Cow::Owned(pd.buffer.to_str().to_owned());
append_text(cow_text, parent_id, text.range().into(), pd.after_text, doc, pd);
pd.after_text = true;
pd.buffer.clear();
}
Ok(())
}
fn append_text<'input>(
text: Cow<'input, str>,
parent_id: NodeId,
range: ShortRange,
after_text: bool,
doc: &mut Document<'input>,
pd: &mut ParserData<'input>
) {
if after_text {
if let Some(node) = doc.nodes.iter_mut().last() {
if let NodeKind::Text(ref mut prev_text) = node.kind {
match *prev_text {
Cow::Borrowed(..) => {
*prev_text = Cow::Owned((*prev_text).to_string() + text.borrow());
}
Cow::Owned(ref mut s) => {
s.push_str(text.borrow());
}
}
}
}
} else {
doc.append(parent_id, NodeKind::Text(text), range, pd);
}
}
enum NextChunk<'a> {
Byte(u8),
Char(char),
Text(StrSpan<'a>),
}
fn parse_next_chunk<'a>(
s: &mut Stream<'a>,
entities: &[Entity<'a>],
) -> Result<NextChunk<'a>, Error> {
debug_assert!(!s.at_end());
let c = s.curr_byte_unchecked();
if c == b'&' {
let start = s.pos();
match s.try_consume_reference() {
Some(Reference::Char(ch)) => {
Ok(NextChunk::Char(ch))
}
Some(Reference::Entity(name)) => {
match entities.iter().find(|e| e.name == name) {
Some(entity) => {
Ok(NextChunk::Text(entity.value))
}
None => {
let pos = s.gen_text_pos_from(start);
Err(Error::UnknownEntityReference(name.into(), pos))
}
}
}
None => {
let pos = s.gen_text_pos_from(start);
Err(Error::MalformedEntityReference(pos))
}
}
} else {
s.advance(1);
Ok(NextChunk::Byte(c))
}
}
fn normalize_attribute<'input>(
input: &'input str,
text: StrSpan<'input>,
entities: &[Entity],
loop_detector: &mut LoopDetector,
buffer: &mut TextBuffer,
) -> Result<Cow<'input, str>, Error> {
if is_normalization_required(&text) {
buffer.clear();
_normalize_attribute(input, text, entities, loop_detector, buffer)?;
Ok(Cow::Owned(buffer.to_str().to_owned()))
} else {
Ok(Cow::Borrowed(text.as_str()))
}
}
#[inline]
fn is_normalization_required(text: &StrSpan) -> bool {
fn check(c: u8) -> bool {
match c {
b'&'
| b'\t'
| b'\n'
| b'\r' => true,
_ => false,
}
}
text.as_str().bytes().any(check)
}
fn _normalize_attribute(
input: &str,
text: StrSpan,
entities: &[Entity],
loop_detector: &mut LoopDetector,
buffer: &mut TextBuffer,
) -> Result<(), Error> {
let mut s = Stream::from_substr(input, text.range());
while !s.at_end() {
let c = s.curr_byte_unchecked();
if c != b'&' {
s.advance(1);
buffer.push_from_attr(c, s.curr_byte().ok());
continue;
}
let start = s.pos();
match s.try_consume_reference() {
Some(Reference::Char(ch)) => {
for b in CharToBytes::new(ch) {
if loop_detector.depth > 0 {
if b == b'<' {
return Err(Error::InvalidAttributeValue(s.gen_text_pos_from(start)));
}
buffer.push_from_attr(b, None);
} else {
buffer.push_raw(b);
}
}
}
Some(Reference::Entity(name)) => {
match entities.iter().find(|e| e.name == name) {
Some(entity) => {
loop_detector.inc_references(&s)?;
loop_detector.inc_depth(&s)?;
_normalize_attribute(input, entity.value, entities, loop_detector, buffer)?;
loop_detector.dec_depth();
}
None => {
let pos = s.gen_text_pos_from(start);
return Err(Error::UnknownEntityReference(name.into(), pos));
}
}
}
None => {
let pos = s.gen_text_pos_from(start);
return Err(Error::MalformedEntityReference(pos));
}
}
}
Ok(())
}
fn get_ns_by_prefix<'input>(
doc: &Document<'input>,
range: ShortRange,
prefix: StrSpan,
) -> Result<Option<Cow<'input, str>>, Error> {
let prefix_opt = if prefix.is_empty() { None } else { Some(prefix.as_str()) };
let uri = doc.namespaces[range.to_urange()].iter()
.find(|ns| ns.name == prefix_opt)
.map(|ns| ns.uri.clone());
match uri {
Some(v) => Ok(Some(v)),
None => {
if !prefix.is_empty() {
let pos = err_pos_from_span(doc.text, prefix);
Err(Error::UnknownNamespace(prefix.as_str().to_string(), pos))
} else {
Ok(None)
}
}
}
}
#[inline]
fn gen_qname_string(prefix: &str, local: &str) -> String {
if prefix.is_empty() {
local.to_string()
} else {
format!("{}:{}", prefix, local)
}
}
#[inline]
fn err_pos_from_span(input: &str, text: StrSpan) -> TextPos {
Stream::from_substr(input, text.range()).gen_text_pos()
}
#[inline]
fn err_pos_from_qname(input: &str, prefix: StrSpan, local: StrSpan) -> TextPos {
let err_span = if prefix.is_empty() { local } else { prefix };
err_pos_from_span(input, err_span)
}
mod internals {
pub struct CharToBytes {
buf: [u8; 4],
idx: u8,
}
impl CharToBytes {
#[inline]
pub fn new(c: char) -> Self {
let mut buf = [0xFF; 4];
c.encode_utf8(&mut buf);
CharToBytes {
buf,
idx: 0,
}
}
}
impl Iterator for CharToBytes {
type Item = u8;
#[inline]
fn next(&mut self) -> Option<Self::Item> {
if self.idx < 4 {
let b = self.buf[self.idx as usize];
if b != 0xFF {
self.idx += 1;
return Some(b);
} else {
self.idx = 4;
}
}
None
}
}
pub struct TextBuffer {
buf: Vec<u8>,
}
impl TextBuffer {
#[inline]
pub fn new() -> Self {
TextBuffer {
buf: Vec::with_capacity(32),
}
}
#[inline]
pub fn push_raw(&mut self, c: u8) {
self.buf.push(c);
}
pub fn push_from_attr(&mut self, mut c: u8, c2: Option<u8>) {
if c == b'\r' && c2 == Some(b'\n') {
return;
}
c = match c {
b'\n' | b'\r' | b'\t' => b' ',
_ => c,
};
self.buf.push(c);
}
pub fn push_from_text(&mut self, c: u8, at_end: bool) {
if self.buf.last() == Some(&b'\r') {
let idx = self.buf.len() - 1;
self.buf[idx] = b'\n';
if at_end && c == b'\r' {
self.buf.push(b'\n');
} else if c != b'\n' {
self.buf.push(c);
}
} else if at_end && c == b'\r' {
self.buf.push(b'\n');
} else {
self.buf.push(c);
}
}
#[inline]
pub fn clear(&mut self) {
self.buf.clear();
}
#[inline]
pub fn is_empty(&self) -> bool {
self.buf.is_empty()
}
#[inline]
pub fn to_str(&self) -> &str {
use std::str;
str::from_utf8(&self.buf).unwrap()
}
}
}
use self::internals::*;