1use std::borrow::Cow;
2
3use bstr::{BStr, ByteSlice};
4use winnow::{
5 combinator::{alt, delimited, opt, preceded, repeat},
6 error::{ErrorKind, InputError as NomError, ParserError as _},
7 prelude::*,
8 stream::{Offset as _, Stream as _},
9 token::{one_of, take_till, take_while},
10};
11
12use crate::parse::{error::ParseNode, section, Comment, Error, Event};
13
14pub fn from_bytes<'i>(mut input: &'i [u8], dispatch: &mut dyn FnMut(Event<'i>)) -> Result<(), Error> {
16 let start = input.checkpoint();
17
18 let bom = unicode_bom::Bom::from(input);
19 input.next_slice(bom.len());
20
21 repeat(
22 0..,
23 alt((
24 comment.map(Event::Comment),
25 take_spaces1.map(|whitespace| Event::Whitespace(Cow::Borrowed(whitespace))),
26 |i: &mut &'i [u8]| {
27 let newline = take_newlines1.parse_next(i)?;
28 let o = Event::Newline(Cow::Borrowed(newline));
29 Ok(o)
30 },
31 )),
32 )
33 .fold(|| (), |_acc, event| dispatch(event))
34 .parse_next(&mut input)
35 .expect("many0(alt(...)) panicked. Likely a bug in one of the children parsers.");
41
42 if input.is_empty() {
43 return Ok(());
44 }
45
46 let mut node = ParseNode::SectionHeader;
47
48 let res = repeat(1.., |i: &mut &'i [u8]| section(i, &mut node, dispatch))
49 .map(|()| ())
50 .parse_next(&mut input);
51 res.map_err(|_| {
52 let newlines = newlines_from(input, start);
53 Error {
54 line_number: newlines,
55 last_attempted_parser: node,
56 parsed_until: input.as_bstr().into(),
57 }
58 })?;
59
60 if !input.is_empty() {
63 let newlines = newlines_from(input, start);
64 return Err(Error {
65 line_number: newlines,
66 last_attempted_parser: node,
67 parsed_until: input.as_bstr().into(),
68 });
69 }
70
71 Ok(())
72}
73
74fn newlines_from(input: &[u8], start: winnow::stream::Checkpoint<&[u8], &[u8]>) -> usize {
75 let offset = input.offset_from(&start);
76 let mut start_input = input;
77 start_input.reset(&start);
78 start_input.next_slice(offset).iter().filter(|c| **c == b'\n').count()
79}
80
81fn comment<'i>(i: &mut &'i [u8]) -> PResult<Comment<'i>, NomError<&'i [u8]>> {
82 (
83 one_of([';', '#']),
84 take_till(0.., |c| c == b'\n').map(|text: &[u8]| Cow::Borrowed(text.as_bstr())),
85 )
86 .map(|(tag, text)| Comment { tag, text })
87 .parse_next(i)
88}
89
90#[cfg(test)]
91mod tests;
92
93fn section<'i>(
94 i: &mut &'i [u8],
95 node: &mut ParseNode,
96 dispatch: &mut dyn FnMut(Event<'i>),
97) -> PResult<(), NomError<&'i [u8]>> {
98 let start = i.checkpoint();
99 let header = section_header(i).map_err(|e| {
100 i.reset(&start);
101 e
102 })?;
103 dispatch(Event::SectionHeader(header));
104
105 loop {
108 let start = i.checkpoint();
109
110 if let Some(v) = opt(take_spaces1).parse_next(i)? {
111 dispatch(Event::Whitespace(Cow::Borrowed(v.as_bstr())));
112 }
113
114 if let Some(v) = opt(take_newlines1).parse_next(i)? {
115 dispatch(Event::Newline(Cow::Borrowed(v.as_bstr())));
116 }
117
118 key_value_pair(i, node, dispatch)?;
119
120 if let Some(comment) = opt(comment).parse_next(i)? {
121 dispatch(Event::Comment(comment));
122 }
123
124 if i.offset_from(&start) == 0 {
125 break;
126 }
127 }
128
129 Ok(())
130}
131
132fn section_header<'i>(i: &mut &'i [u8]) -> PResult<section::Header<'i>, NomError<&'i [u8]>> {
133 let name = preceded('[', take_while(1.., is_section_char).map(bstr::ByteSlice::as_bstr)).parse_next(i)?;
135
136 if opt(one_of::<_, _, NomError<&[u8]>>(']')).parse_next(i)?.is_some() {
137 let header = match memchr::memrchr(b'.', name.as_bytes()) {
140 Some(index) => section::Header {
141 name: section::Name(Cow::Borrowed(name[..index].as_bstr())),
142 separator: name.get(index..=index).map(|s| Cow::Borrowed(s.as_bstr())),
143 subsection_name: name.get(index + 1..).map(|s| Cow::Borrowed(s.as_bstr())),
144 },
145 None => section::Header {
146 name: section::Name(Cow::Borrowed(name.as_bstr())),
147 separator: None,
148 subsection_name: None,
149 },
150 };
151
152 if header.name.is_empty() {
153 return Err(winnow::error::ErrMode::from_error_kind(i, ErrorKind::Fail));
154 }
155 return Ok(header);
156 }
157
158 (take_spaces1, delimited('"', opt(sub_section), "\"]"))
160 .map(|(whitespace, subsection_name)| section::Header {
161 name: section::Name(Cow::Borrowed(name)),
162 separator: Some(Cow::Borrowed(whitespace)),
163 subsection_name,
164 })
165 .parse_next(i)
166}
167
168fn is_section_char(c: u8) -> bool {
169 c.is_ascii_alphanumeric() || c == b'-' || c == b'.'
170}
171
172fn sub_section<'i>(i: &mut &'i [u8]) -> PResult<Cow<'i, BStr>, NomError<&'i [u8]>> {
173 let mut output = Cow::Borrowed(Default::default());
174 if let Some(sub) = opt(subsection_subset).parse_next(i)? {
175 output = Cow::Borrowed(sub.as_bstr());
176 }
177 while let Some(sub) = opt(subsection_subset).parse_next(i)? {
178 output.to_mut().extend(sub);
179 }
180
181 Ok(output)
182}
183
184fn subsection_subset<'i>(i: &mut &'i [u8]) -> PResult<&'i [u8], NomError<&'i [u8]>> {
185 alt((subsection_unescaped, subsection_escaped_char)).parse_next(i)
186}
187
188fn subsection_unescaped<'i>(i: &mut &'i [u8]) -> PResult<&'i [u8], NomError<&'i [u8]>> {
189 take_while(1.., is_subsection_unescaped_char).parse_next(i)
190}
191
192fn subsection_escaped_char<'i>(i: &mut &'i [u8]) -> PResult<&'i [u8], NomError<&'i [u8]>> {
193 preceded('\\', one_of(is_subsection_escapable_char).take()).parse_next(i)
194}
195
196fn is_subsection_escapable_char(c: u8) -> bool {
197 c != b'\n'
198}
199
200fn is_subsection_unescaped_char(c: u8) -> bool {
201 c != b'"' && c != b'\\' && c != b'\n' && c != 0
202}
203
204fn key_value_pair<'i>(
205 i: &mut &'i [u8],
206 node: &mut ParseNode,
207 dispatch: &mut dyn FnMut(Event<'i>),
208) -> PResult<(), NomError<&'i [u8]>> {
209 *node = ParseNode::Name;
210 if let Some(name) = opt(config_name).parse_next(i)? {
211 dispatch(Event::SectionValueName(section::ValueName(Cow::Borrowed(name))));
212
213 if let Some(whitespace) = opt(take_spaces1).parse_next(i)? {
214 dispatch(Event::Whitespace(Cow::Borrowed(whitespace)));
215 }
216
217 *node = ParseNode::Value;
218 config_value(i, dispatch)
219 } else {
220 Ok(())
221 }
222}
223
224fn config_name<'i>(i: &mut &'i [u8]) -> PResult<&'i BStr, NomError<&'i [u8]>> {
227 (
228 one_of(|c: u8| c.is_ascii_alphabetic()),
229 take_while(0.., |c: u8| c.is_ascii_alphanumeric() || c == b'-'),
230 )
231 .take()
232 .map(bstr::ByteSlice::as_bstr)
233 .parse_next(i)
234}
235
236fn config_value<'i>(i: &mut &'i [u8], dispatch: &mut dyn FnMut(Event<'i>)) -> PResult<(), NomError<&'i [u8]>> {
237 if opt('=').parse_next(i)?.is_some() {
238 dispatch(Event::KeyValueSeparator);
239 if let Some(whitespace) = opt(take_spaces1).parse_next(i)? {
240 dispatch(Event::Whitespace(Cow::Borrowed(whitespace)));
241 }
242 value_impl(i, dispatch)
243 } else {
244 dispatch(Event::Value(Cow::Borrowed("".into())));
248 Ok(())
249 }
250}
251
252fn value_impl<'i>(i: &mut &'i [u8], dispatch: &mut dyn FnMut(Event<'i>)) -> PResult<(), NomError<&'i [u8]>> {
255 let start_checkpoint = i.checkpoint();
256 let mut value_start_checkpoint = i.checkpoint();
257 let mut value_end = None;
258
259 let mut is_in_quotes = false;
261 let mut partial_value_found = false;
263
264 loop {
265 let _ = take_while(0.., |c| !matches!(c, b'\n' | b'\\' | b'"' | b';' | b'#')).parse_next(i)?;
266 if let Some(c) = i.next_token() {
267 match c {
268 b'\n' => {
269 value_end = Some(i.offset_from(&value_start_checkpoint) - 1);
270 break;
271 }
272 b';' | b'#' if !is_in_quotes => {
273 value_end = Some(i.offset_from(&value_start_checkpoint) - 1);
274 break;
275 }
276 b'\\' => {
277 let escaped_index = i.offset_from(&value_start_checkpoint);
278 let escape_index = escaped_index - 1;
279 let Some(mut c) = i.next_token() else {
280 i.reset(&start_checkpoint);
281 return Err(winnow::error::ErrMode::from_error_kind(i, ErrorKind::Token));
282 };
283 let mut consumed = 1;
284 if c == b'\r' {
285 c = i.next_token().ok_or_else(|| {
286 i.reset(&start_checkpoint);
287 winnow::error::ErrMode::from_error_kind(i, ErrorKind::Token)
288 })?;
289 if c != b'\n' {
290 i.reset(&start_checkpoint);
291 return Err(winnow::error::ErrMode::from_error_kind(i, ErrorKind::Slice));
292 }
293 consumed += 1;
294 }
295
296 match c {
297 b'\n' => {
298 partial_value_found = true;
299
300 i.reset(&value_start_checkpoint);
301
302 let value = i.next_slice(escape_index).as_bstr();
303 dispatch(Event::ValueNotDone(Cow::Borrowed(value)));
304
305 i.next_token();
306
307 let nl = i.next_slice(consumed).as_bstr();
308 dispatch(Event::Newline(Cow::Borrowed(nl)));
309
310 value_start_checkpoint = i.checkpoint();
311 value_end = None;
312 }
313 b'n' | b't' | b'\\' | b'b' | b'"' => {}
314 _ => {
315 i.reset(&start_checkpoint);
316 return Err(winnow::error::ErrMode::from_error_kind(i, ErrorKind::Token));
317 }
318 }
319 }
320 b'"' => is_in_quotes = !is_in_quotes,
321 _ => {}
322 }
323 } else {
324 break;
325 }
326 }
327 if is_in_quotes {
328 i.reset(&start_checkpoint);
329 return Err(winnow::error::ErrMode::from_error_kind(i, ErrorKind::Slice));
330 }
331
332 let value_end = match value_end {
333 None => {
334 let last_value_index = i.offset_from(&value_start_checkpoint);
335 if last_value_index == 0 {
336 dispatch(Event::Value(Cow::Borrowed("".into())));
337 return Ok(());
338 } else {
339 last_value_index
340 }
341 }
342 Some(idx) => idx,
343 };
344
345 i.reset(&value_start_checkpoint);
346 let value_end_no_trailing_whitespace = i[..value_end]
347 .iter()
348 .enumerate()
349 .rev()
350 .find_map(|(idx, b)| (!b.is_ascii_whitespace()).then_some(idx + 1))
351 .unwrap_or(0);
352 let remainder_value = i.next_slice(value_end_no_trailing_whitespace);
353
354 if partial_value_found {
355 dispatch(Event::ValueDone(Cow::Borrowed(remainder_value.as_bstr())));
356 } else {
357 dispatch(Event::Value(Cow::Borrowed(remainder_value.as_bstr())));
358 }
359
360 Ok(())
361}
362
363fn take_spaces1<'i>(i: &mut &'i [u8]) -> PResult<&'i BStr, NomError<&'i [u8]>> {
364 take_while(1.., winnow::stream::AsChar::is_space)
365 .map(bstr::ByteSlice::as_bstr)
366 .parse_next(i)
367}
368
369fn take_newlines1<'i>(i: &mut &'i [u8]) -> PResult<&'i BStr, NomError<&'i [u8]>> {
370 repeat(1..1024, alt(("\r\n", "\n")))
371 .map(|()| ())
372 .take()
373 .map(bstr::ByteSlice::as_bstr)
374 .parse_next(i)
375}