gix_config/parse/
events.rs

1use smallvec::SmallVec;
2
3use crate::{
4    parse,
5    parse::{Event, Section},
6};
7
8/// A type store without allocation all events that are typically preceding the first section.
9pub type FrontMatterEvents<'a> = SmallVec<[Event<'a>; 8]>;
10
11/// A zero-copy `git-config` file parser.
12///
13/// This is parser exposes low-level syntactic events from a `git-config` file.
14/// Generally speaking, you'll want to use [`File`] as it wraps
15/// around the parser to provide a higher-level abstraction to a `git-config`
16/// file, including querying, modifying, and updating values.
17///
18/// This parser guarantees that the events emitted are sufficient to
19/// reconstruct a `git-config` file identical to the source `git-config`
20/// when writing it.
21///
22/// # Differences between a `.ini` parser
23///
24/// While the `git-config` format closely resembles the [`.ini` file format],
25/// there are subtle differences that make them incompatible. For one, the file
26/// format is not well defined, and there exists no formal specification to
27/// adhere to.
28///
29/// For concrete examples, some notable differences are:
30/// - `git-config` sections permit subsections via either a quoted string
31///   (`[some-section "subsection"]`) or via the deprecated dot notation
32///   (`[some-section.subsection]`). Successful parsing these section names is not
33///   well defined in typical `.ini` parsers. This parser will handle these cases
34///   perfectly.
35/// - Comment markers are not strictly defined either. This parser will always
36///   and only handle a semicolon or octothorpe (also known as a hash or number
37///   sign).
38/// - Global properties may be allowed in `.ini` parsers, but is strictly
39///   disallowed by this parser.
40/// - Only `\t`, `\n`, `\b` `\\` are valid escape characters.
41/// - Quoted and semi-quoted values will be parsed (but quotes will be included
42///   in event outputs). An example of a semi-quoted value is `5"hello world"`,
43///   which should be interpreted as `5hello world` after
44///   [normalization][crate::value::normalize()].
45/// - Line continuations via a `\` character is supported (inside or outside of quotes)
46/// - Whitespace handling similarly follows the `git-config` specification as
47///   closely as possible, where excess whitespace after a non-quoted value are
48///   trimmed, and line continuations onto a new line with excess spaces are kept.
49/// - Only equal signs (optionally padded by spaces) are valid name/value
50///   delimiters.
51///
52/// Note that things such as case-sensitivity or duplicate sections are
53/// _not_ handled. This parser is a low level _syntactic_ interpreter
54/// and higher level wrappers around this parser, which may
55/// or may not be zero-copy, should handle _semantic_ values. This also means
56/// that string-like values are not interpreted. For example, `hello"world"`
57/// would be read at a high level as `helloworld` but this parser will return
58/// the former instead, with the extra quotes. This is because it is not the
59/// responsibility of the parser to interpret these values, and doing so would
60/// necessarily require a copy, which this parser avoids.
61///
62/// # Trait Implementations
63///
64/// - This struct does _not_ implement [`FromStr`] due to lifetime
65///   constraints implied on the required `from_str` method. Instead, it provides
66///   [`From<&'_ str>`].
67///
68/// # Idioms
69///
70/// If you do want to use this parser, there are some idioms that may help you
71/// with interpreting sequences of events.
72///
73/// ## `Value` events do not immediately follow `Key` events
74///
75/// Consider the following `git-config` example:
76///
77/// ```text
78/// [core]
79///   autocrlf = input
80/// ```
81///
82/// Because this parser guarantees perfect reconstruction, there are many
83/// non-significant events that occur in addition to the ones you may expect:
84///
85/// ```
86/// # use gix_config::parse::{Event, Events, section};
87/// # use std::borrow::Cow;
88/// # use std::convert::TryFrom;
89/// # let section_header = section::Header::new("core", None).unwrap();
90/// # let section_data = "[core]\n  autocrlf = input";
91/// # assert_eq!(Events::from_str(section_data).unwrap().into_vec(), vec![
92/// Event::SectionHeader(section_header),
93/// Event::Newline(Cow::Borrowed("\n".into())),
94/// Event::Whitespace(Cow::Borrowed("  ".into())),
95/// Event::SectionValueName(section::ValueName::try_from("autocrlf")?),
96/// Event::Whitespace(Cow::Borrowed(" ".into())),
97/// Event::KeyValueSeparator,
98/// Event::Whitespace(Cow::Borrowed(" ".into())),
99/// Event::Value(Cow::Borrowed("input".into())),
100/// # ]);
101/// # Ok::<_, Box<dyn std::error::Error>>(())
102/// ```
103///
104/// Note the two whitespace events between the key and value pair! Those two
105/// events actually refer to the whitespace between the name and value and the
106/// equal sign. So if the config instead had `autocrlf=input`, those whitespace
107/// events would no longer be present.
108///
109/// ## `KeyValueSeparator` event is not guaranteed to emit
110///
111/// Consider the following `git-config` example:
112///
113/// ```text
114/// [core]
115///   autocrlf
116/// ```
117///
118/// This is a valid config with a `autocrlf` key having an implicit `true`
119/// value. This means that there is not a `=` separating the key and value,
120/// which means that the corresponding event won't appear either:
121///
122/// ```
123/// # use gix_config::parse::{Event, Events, section};
124/// # use std::borrow::Cow;
125/// # use std::convert::TryFrom;
126/// # let section_header = section::Header::new("core", None).unwrap();
127/// # let section_data = "[core]\n  autocrlf";
128/// # assert_eq!(Events::from_str(section_data).unwrap().into_vec(), vec![
129/// Event::SectionHeader(section_header),
130/// Event::Newline(Cow::Borrowed("\n".into())),
131/// Event::Whitespace(Cow::Borrowed("  ".into())),
132/// Event::SectionValueName(section::ValueName::try_from("autocrlf")?),
133/// Event::Value(Cow::Borrowed("".into())),
134/// # ]);
135/// # Ok::<_, Box<dyn std::error::Error>>(())
136/// ```
137///
138/// ## Quoted values are not unquoted
139///
140/// Consider the following `git-config` example:
141///
142/// ```text
143/// [core]
144/// autocrlf=true""
145/// filemode=fa"lse"
146/// ```
147///
148/// Both these events, when fully processed, should normally be `true` and
149/// `false`. However, because this parser is zero-copy, we cannot process
150/// partially quoted values, such as the `false` example. As a result, to
151/// maintain consistency, the parser will just take all values as literals. The
152/// relevant event stream emitted is thus emitted as:
153///
154/// ```
155/// # use gix_config::parse::{Event, Events, section};
156/// # use std::borrow::Cow;
157/// # use std::convert::TryFrom;
158/// # let section_header = section::Header::new("core", None).unwrap();
159/// # let section_data = "[core]\nautocrlf=true\"\"\nfilemode=fa\"lse\"";
160/// # assert_eq!(Events::from_str(section_data).unwrap().into_vec(), vec![
161/// Event::SectionHeader(section_header),
162/// Event::Newline(Cow::Borrowed("\n".into())),
163/// Event::SectionValueName(section::ValueName::try_from("autocrlf")?),
164/// Event::KeyValueSeparator,
165/// Event::Value(Cow::Borrowed(r#"true"""#.into())),
166/// Event::Newline(Cow::Borrowed("\n".into())),
167/// Event::SectionValueName(section::ValueName::try_from("filemode")?),
168/// Event::KeyValueSeparator,
169/// Event::Value(Cow::Borrowed(r#"fa"lse""#.into())),
170/// # ]);
171/// # Ok::<_, Box<dyn std::error::Error>>(())
172/// ```
173///
174/// ## Whitespace after line continuations are part of the value
175///
176/// Consider the following `git-config` example:
177///
178/// ```text
179/// [some-section]
180/// file=a\
181///     c
182/// ```
183///
184/// Because how `git-config` treats continuations, the whitespace preceding `c`
185/// are in fact part of the value of `file`. The fully interpreted key/value
186/// pair is actually `file=a    c`. As a result, the parser will provide this
187/// split value accordingly:
188///
189/// ```
190/// # use gix_config::parse::{Event, Events, section};
191/// # use std::borrow::Cow;
192/// # use std::convert::TryFrom;
193/// # let section_header = section::Header::new("some-section", None).unwrap();
194/// # let section_data = "[some-section]\nfile=a\\\n    c";
195/// # assert_eq!(Events::from_str(section_data).unwrap().into_vec(), vec![
196/// Event::SectionHeader(section_header),
197/// Event::Newline(Cow::Borrowed("\n".into())),
198/// Event::SectionValueName(section::ValueName::try_from("file")?),
199/// Event::KeyValueSeparator,
200/// Event::ValueNotDone(Cow::Borrowed("a".into())),
201/// Event::Newline(Cow::Borrowed("\n".into())),
202/// Event::ValueDone(Cow::Borrowed("    c".into())),
203/// # ]);
204/// # Ok::<_, Box<dyn std::error::Error>>(())
205/// ```
206///
207/// [`File`]: crate::File
208/// [`.ini` file format]: https://en.wikipedia.org/wiki/INI_file
209/// [`git`'s documentation]: https://git-scm.com/docs/git-config#_configuration_file
210/// [`FromStr`]: std::str::FromStr
211/// [`From<&'_ str>`]: std::convert::From
212#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug, Default)]
213pub struct Events<'a> {
214    /// Events seen before the first section.
215    pub frontmatter: FrontMatterEvents<'a>,
216    /// All parsed sections.
217    pub sections: Vec<Section<'a>>,
218}
219
220impl Events<'static> {
221    /// Parses the provided bytes, returning an [`Events`] that contains allocated
222    /// and owned events. This is similar to [`Events::from_bytes()`], but performance
223    /// is degraded as it requires allocation for every event.
224    ///
225    /// Use `filter` to only include those events for which it returns true.
226    pub fn from_bytes_owned<'a>(
227        input: &'a [u8],
228        filter: Option<fn(&Event<'a>) -> bool>,
229    ) -> Result<Events<'static>, parse::Error> {
230        from_bytes(input, &|e| e.to_owned(), filter)
231    }
232}
233
234impl<'a> Events<'a> {
235    /// Attempt to zero-copy parse the provided bytes. On success, returns a
236    /// [`Events`] that provides methods to accessing leading comments and sections
237    /// of a `git-config` file and can be converted into an iterator of [`Event`]
238    /// for higher level processing.
239    ///
240    /// Use `filter` to only include those events for which it returns true.
241    pub fn from_bytes(input: &'a [u8], filter: Option<fn(&Event<'a>) -> bool>) -> Result<Events<'a>, parse::Error> {
242        from_bytes(input, &std::convert::identity, filter)
243    }
244
245    /// Attempt to zero-copy parse the provided `input` string.
246    ///
247    /// Prefer the [`from_bytes()`][Self::from_bytes()] method if UTF8 encoding
248    /// isn't guaranteed.
249    #[allow(clippy::should_implement_trait)]
250    pub fn from_str(input: &'a str) -> Result<Events<'a>, parse::Error> {
251        Self::from_bytes(input.as_bytes(), None)
252    }
253
254    /// Consumes the parser to produce an iterator of all contained events.
255    #[must_use = "iterators are lazy and do nothing unless consumed"]
256    #[allow(clippy::should_implement_trait)]
257    pub fn into_iter(self) -> impl std::iter::FusedIterator<Item = parse::Event<'a>> {
258        self.frontmatter.into_iter().chain(
259            self.sections
260                .into_iter()
261                .flat_map(|section| std::iter::once(parse::Event::SectionHeader(section.header)).chain(section.events)),
262        )
263    }
264
265    /// Place all contained events into a single `Vec`.
266    pub fn into_vec(self) -> Vec<parse::Event<'a>> {
267        self.into_iter().collect()
268    }
269}
270
271impl<'a> TryFrom<&'a str> for Events<'a> {
272    type Error = parse::Error;
273
274    fn try_from(value: &'a str) -> Result<Self, Self::Error> {
275        Self::from_str(value)
276    }
277}
278
279impl<'a> TryFrom<&'a [u8]> for Events<'a> {
280    type Error = parse::Error;
281
282    fn try_from(value: &'a [u8]) -> Result<Self, Self::Error> {
283        Events::from_bytes(value, None)
284    }
285}
286
287fn from_bytes<'a, 'b>(
288    input: &'a [u8],
289    convert: &dyn Fn(Event<'a>) -> Event<'b>,
290    filter: Option<fn(&Event<'a>) -> bool>,
291) -> Result<Events<'b>, parse::Error> {
292    let mut header = None;
293    let mut events = Vec::with_capacity(256);
294    let mut frontmatter = FrontMatterEvents::default();
295    let mut sections = Vec::new();
296    parse::from_bytes(input, &mut |e: Event<'_>| match e {
297        Event::SectionHeader(next_header) => {
298            match header.take() {
299                None => {
300                    frontmatter = std::mem::take(&mut events).into_iter().collect();
301                }
302                Some(prev_header) => {
303                    sections.push(parse::Section {
304                        header: prev_header,
305                        events: std::mem::take(&mut events),
306                    });
307                }
308            };
309            header = match convert(Event::SectionHeader(next_header)) {
310                Event::SectionHeader(h) => h,
311                _ => unreachable!("BUG: convert must not change the event type, just the lifetime"),
312            }
313            .into();
314        }
315        event => {
316            if filter.map_or(true, |f| f(&event)) {
317                events.push(convert(event));
318            }
319        }
320    })?;
321
322    match header {
323        None => {
324            frontmatter = events.into_iter().collect();
325        }
326        Some(prev_header) => {
327            sections.push(parse::Section {
328                header: prev_header,
329                events: std::mem::take(&mut events),
330            });
331        }
332    }
333    Ok(Events { frontmatter, sections })
334}