use std::borrow::Cow;
use bstr::{BStr, ByteSlice};
use winnow::{
combinator::{alt, delimited, opt, preceded, repeat},
error::{ErrorKind, InputError as NomError, ParserError as _},
prelude::*,
stream::{Offset as _, Stream as _},
token::{one_of, take_till, take_while},
};
use crate::parse::{error::ParseNode, section, Comment, Error, Event};
pub fn from_bytes<'i>(mut input: &'i [u8], dispatch: &mut dyn FnMut(Event<'i>)) -> Result<(), Error> {
let start = input.checkpoint();
let bom = unicode_bom::Bom::from(input);
input.next_slice(bom.len());
repeat(
0..,
alt((
comment.map(Event::Comment),
take_spaces1.map(|whitespace| Event::Whitespace(Cow::Borrowed(whitespace))),
|i: &mut &'i [u8]| {
let newline = take_newlines1.parse_next(i)?;
let o = Event::Newline(Cow::Borrowed(newline));
Ok(o)
},
)),
)
.fold(|| (), |_acc, event| dispatch(event))
.parse_next(&mut input)
.expect("many0(alt(...)) panicked. Likely a bug in one of the children parsers.");
if input.is_empty() {
return Ok(());
}
let mut node = ParseNode::SectionHeader;
let res = repeat(1.., |i: &mut &'i [u8]| section(i, &mut node, dispatch))
.map(|()| ())
.parse_next(&mut input);
res.map_err(|_| {
let newlines = newlines_from(input, start);
Error {
line_number: newlines,
last_attempted_parser: node,
parsed_until: input.as_bstr().into(),
}
})?;
if !input.is_empty() {
let newlines = newlines_from(input, start);
return Err(Error {
line_number: newlines,
last_attempted_parser: node,
parsed_until: input.as_bstr().into(),
});
}
Ok(())
}
fn newlines_from(input: &[u8], start: winnow::stream::Checkpoint<&[u8], &[u8]>) -> usize {
let offset = input.offset_from(&start);
let mut start_input = input;
start_input.reset(&start);
start_input.next_slice(offset).iter().filter(|c| **c == b'\n').count()
}
fn comment<'i>(i: &mut &'i [u8]) -> PResult<Comment<'i>, NomError<&'i [u8]>> {
(
one_of([';', '#']),
take_till(0.., |c| c == b'\n').map(|text: &[u8]| Cow::Borrowed(text.as_bstr())),
)
.map(|(tag, text)| Comment { tag, text })
.parse_next(i)
}
#[cfg(test)]
mod tests;
fn section<'i>(
i: &mut &'i [u8],
node: &mut ParseNode,
dispatch: &mut dyn FnMut(Event<'i>),
) -> PResult<(), NomError<&'i [u8]>> {
let start = i.checkpoint();
let header = section_header(i).map_err(|e| {
i.reset(&start);
e
})?;
dispatch(Event::SectionHeader(header));
loop {
let start = i.checkpoint();
if let Some(v) = opt(take_spaces1).parse_next(i)? {
dispatch(Event::Whitespace(Cow::Borrowed(v.as_bstr())));
}
if let Some(v) = opt(take_newlines1).parse_next(i)? {
dispatch(Event::Newline(Cow::Borrowed(v.as_bstr())));
}
key_value_pair(i, node, dispatch)?;
if let Some(comment) = opt(comment).parse_next(i)? {
dispatch(Event::Comment(comment));
}
if i.offset_from(&start) == 0 {
break;
}
}
Ok(())
}
fn section_header<'i>(i: &mut &'i [u8]) -> PResult<section::Header<'i>, NomError<&'i [u8]>> {
let name = preceded('[', take_while(1.., is_section_char).map(bstr::ByteSlice::as_bstr)).parse_next(i)?;
if opt(one_of::<_, _, NomError<&[u8]>>(']')).parse_next(i)?.is_some() {
let header = match memchr::memrchr(b'.', name.as_bytes()) {
Some(index) => section::Header {
name: section::Name(Cow::Borrowed(name[..index].as_bstr())),
separator: name.get(index..=index).map(|s| Cow::Borrowed(s.as_bstr())),
subsection_name: name.get(index + 1..).map(|s| Cow::Borrowed(s.as_bstr())),
},
None => section::Header {
name: section::Name(Cow::Borrowed(name.as_bstr())),
separator: None,
subsection_name: None,
},
};
if header.name.is_empty() {
return Err(winnow::error::ErrMode::from_error_kind(i, ErrorKind::Fail));
}
return Ok(header);
}
(take_spaces1, delimited('"', opt(sub_section), "\"]"))
.map(|(whitespace, subsection_name)| section::Header {
name: section::Name(Cow::Borrowed(name)),
separator: Some(Cow::Borrowed(whitespace)),
subsection_name,
})
.parse_next(i)
}
fn is_section_char(c: u8) -> bool {
c.is_ascii_alphanumeric() || c == b'-' || c == b'.'
}
fn sub_section<'i>(i: &mut &'i [u8]) -> PResult<Cow<'i, BStr>, NomError<&'i [u8]>> {
let mut output = Cow::Borrowed(Default::default());
if let Some(sub) = opt(subsection_subset).parse_next(i)? {
output = Cow::Borrowed(sub.as_bstr());
}
while let Some(sub) = opt(subsection_subset).parse_next(i)? {
output.to_mut().extend(sub);
}
Ok(output)
}
fn subsection_subset<'i>(i: &mut &'i [u8]) -> PResult<&'i [u8], NomError<&'i [u8]>> {
alt((subsection_unescaped, subsection_escaped_char)).parse_next(i)
}
fn subsection_unescaped<'i>(i: &mut &'i [u8]) -> PResult<&'i [u8], NomError<&'i [u8]>> {
take_while(1.., is_subsection_unescaped_char).parse_next(i)
}
fn subsection_escaped_char<'i>(i: &mut &'i [u8]) -> PResult<&'i [u8], NomError<&'i [u8]>> {
preceded('\\', one_of(is_subsection_escapable_char).recognize()).parse_next(i)
}
fn is_subsection_escapable_char(c: u8) -> bool {
c != b'\n'
}
fn is_subsection_unescaped_char(c: u8) -> bool {
c != b'"' && c != b'\\' && c != b'\n' && c != 0
}
fn key_value_pair<'i>(
i: &mut &'i [u8],
node: &mut ParseNode,
dispatch: &mut dyn FnMut(Event<'i>),
) -> PResult<(), NomError<&'i [u8]>> {
*node = ParseNode::Name;
if let Some(name) = opt(config_name).parse_next(i)? {
dispatch(Event::SectionValueName(section::ValueName(Cow::Borrowed(name))));
if let Some(whitespace) = opt(take_spaces1).parse_next(i)? {
dispatch(Event::Whitespace(Cow::Borrowed(whitespace)));
}
*node = ParseNode::Value;
config_value(i, dispatch)
} else {
Ok(())
}
}
fn config_name<'i>(i: &mut &'i [u8]) -> PResult<&'i BStr, NomError<&'i [u8]>> {
(
one_of(|c: u8| c.is_ascii_alphabetic()),
take_while(0.., |c: u8| c.is_ascii_alphanumeric() || c == b'-'),
)
.recognize()
.map(bstr::ByteSlice::as_bstr)
.parse_next(i)
}
fn config_value<'i>(i: &mut &'i [u8], dispatch: &mut dyn FnMut(Event<'i>)) -> PResult<(), NomError<&'i [u8]>> {
if opt('=').parse_next(i)?.is_some() {
dispatch(Event::KeyValueSeparator);
if let Some(whitespace) = opt(take_spaces1).parse_next(i)? {
dispatch(Event::Whitespace(Cow::Borrowed(whitespace)));
}
value_impl(i, dispatch)
} else {
dispatch(Event::Value(Cow::Borrowed("".into())));
Ok(())
}
}
fn value_impl<'i>(i: &mut &'i [u8], dispatch: &mut dyn FnMut(Event<'i>)) -> PResult<(), NomError<&'i [u8]>> {
let start_checkpoint = i.checkpoint();
let mut value_start_checkpoint = i.checkpoint();
let mut value_end = None;
let mut is_in_quotes = false;
let mut partial_value_found = false;
loop {
let _ = take_while(0.., |c| !matches!(c, b'\n' | b'\\' | b'"' | b';' | b'#')).parse_next(i)?;
if let Some(c) = i.next_token() {
match c {
b'\n' => {
value_end = Some(i.offset_from(&value_start_checkpoint) - 1);
break;
}
b';' | b'#' if !is_in_quotes => {
value_end = Some(i.offset_from(&value_start_checkpoint) - 1);
break;
}
b'\\' => {
let escaped_index = i.offset_from(&value_start_checkpoint);
let escape_index = escaped_index - 1;
let Some(mut c) = i.next_token() else {
i.reset(&start_checkpoint);
return Err(winnow::error::ErrMode::from_error_kind(i, ErrorKind::Token));
};
let mut consumed = 1;
if c == b'\r' {
c = i.next_token().ok_or_else(|| {
i.reset(&start_checkpoint);
winnow::error::ErrMode::from_error_kind(i, ErrorKind::Token)
})?;
if c != b'\n' {
i.reset(&start_checkpoint);
return Err(winnow::error::ErrMode::from_error_kind(i, ErrorKind::Slice));
}
consumed += 1;
}
match c {
b'\n' => {
partial_value_found = true;
i.reset(&value_start_checkpoint);
let value = i.next_slice(escape_index).as_bstr();
dispatch(Event::ValueNotDone(Cow::Borrowed(value)));
i.next_token();
let nl = i.next_slice(consumed).as_bstr();
dispatch(Event::Newline(Cow::Borrowed(nl)));
value_start_checkpoint = i.checkpoint();
value_end = None;
}
b'n' | b't' | b'\\' | b'b' | b'"' => {}
_ => {
i.reset(&start_checkpoint);
return Err(winnow::error::ErrMode::from_error_kind(i, ErrorKind::Token));
}
}
}
b'"' => is_in_quotes = !is_in_quotes,
_ => {}
}
} else {
break;
}
}
if is_in_quotes {
i.reset(&start_checkpoint);
return Err(winnow::error::ErrMode::from_error_kind(i, ErrorKind::Slice));
}
let value_end = match value_end {
None => {
let last_value_index = i.offset_from(&value_start_checkpoint);
if last_value_index == 0 {
dispatch(Event::Value(Cow::Borrowed("".into())));
return Ok(());
} else {
last_value_index
}
}
Some(idx) => idx,
};
i.reset(&value_start_checkpoint);
let value_end_no_trailing_whitespace = i[..value_end]
.iter()
.enumerate()
.rev()
.find_map(|(idx, b)| (!b.is_ascii_whitespace()).then_some(idx + 1))
.unwrap_or(0);
let remainder_value = i.next_slice(value_end_no_trailing_whitespace);
if partial_value_found {
dispatch(Event::ValueDone(Cow::Borrowed(remainder_value.as_bstr())));
} else {
dispatch(Event::Value(Cow::Borrowed(remainder_value.as_bstr())));
}
Ok(())
}
fn take_spaces1<'i>(i: &mut &'i [u8]) -> PResult<&'i BStr, NomError<&'i [u8]>> {
take_while(1.., winnow::stream::AsChar::is_space)
.map(bstr::ByteSlice::as_bstr)
.parse_next(i)
}
fn take_newlines1<'i>(i: &mut &'i [u8]) -> PResult<&'i BStr, NomError<&'i [u8]>> {
repeat(1..1024, alt(("\r\n", "\n")))
.map(|()| ())
.recognize()
.map(bstr::ByteSlice::as_bstr)
.parse_next(i)
}