gix_object/commit/
ref_iter.rs

1use std::{borrow::Cow, ops::Range};
2
3use bstr::BStr;
4use gix_hash::{oid, ObjectId};
5use winnow::{
6    combinator::{alt, eof, opt, terminated},
7    error::StrContext,
8    prelude::*,
9    token::take_till,
10};
11
12use crate::{
13    bstr::ByteSlice,
14    commit::{decode, SignedData},
15    parse,
16    parse::NL,
17    CommitRefIter,
18};
19
20#[derive(Copy, Clone)]
21pub(crate) enum SignatureKind {
22    Author,
23    Committer,
24}
25
26#[derive(Default, Copy, Clone)]
27pub(crate) enum State {
28    #[default]
29    Tree,
30    Parents,
31    Signature {
32        of: SignatureKind,
33    },
34    Encoding,
35    ExtraHeaders,
36    Message,
37}
38
39/// Lifecycle
40impl<'a> CommitRefIter<'a> {
41    /// Create a commit iterator from data.
42    pub fn from_bytes(data: &'a [u8]) -> CommitRefIter<'a> {
43        CommitRefIter {
44            data,
45            state: State::default(),
46        }
47    }
48}
49
50/// Access
51impl<'a> CommitRefIter<'a> {
52    /// Parse `data` as commit and return its PGP signature, along with *all non-signature* data as [`SignedData`], or `None`
53    /// if the commit isn't signed.
54    ///
55    /// This allows the caller to validate the signature by passing the signed data along with the signature back to the program
56    /// that created it.
57    pub fn signature(data: &'a [u8]) -> Result<Option<(Cow<'a, BStr>, SignedData<'a>)>, crate::decode::Error> {
58        let mut signature_and_range = None;
59
60        let raw_tokens = CommitRefIterRaw {
61            data,
62            state: State::default(),
63            offset: 0,
64        };
65        for token in raw_tokens {
66            let token = token?;
67            if let Token::ExtraHeader((name, value)) = &token.token {
68                if *name == "gpgsig" {
69                    // keep track of the signature range alongside the signature data,
70                    // because all but the signature is the signed data.
71                    signature_and_range = Some((value.clone(), token.token_range));
72                    break;
73                }
74            }
75        }
76
77        Ok(signature_and_range.map(|(sig, signature_range)| (sig, SignedData { data, signature_range })))
78    }
79
80    /// Returns the object id of this commits tree if it is the first function called and if there is no error in decoding
81    /// the data.
82    ///
83    /// Note that this method must only be called once or else will always return None while consuming a single token.
84    /// Errors are coerced into options, hiding whether there was an error or not. The caller should assume an error if they
85    /// call the method as intended. Such a squelched error cannot be recovered unless the objects data is retrieved and parsed again.
86    /// `next()`.
87    pub fn tree_id(&mut self) -> Result<ObjectId, crate::decode::Error> {
88        let tree_id = self.next().ok_or_else(missing_field)??;
89        Token::try_into_id(tree_id).ok_or_else(missing_field)
90    }
91
92    /// Return all `parent_ids` as iterator.
93    ///
94    /// Parsing errors are ignored quietly.
95    pub fn parent_ids(self) -> impl Iterator<Item = gix_hash::ObjectId> + 'a {
96        self.filter_map(|t| match t {
97            Ok(Token::Parent { id }) => Some(id),
98            _ => None,
99        })
100    }
101
102    /// Returns all signatures, first the author, then the committer, if there is no decoding error.
103    ///
104    /// Errors are coerced into options, hiding whether there was an error or not. The caller knows if there was an error or not
105    /// if not exactly two signatures were iterable.
106    /// Errors are not the common case - if an error needs to be detectable, use this instance as iterator.
107    pub fn signatures(self) -> impl Iterator<Item = gix_actor::SignatureRef<'a>> + 'a {
108        self.filter_map(|t| match t {
109            Ok(Token::Author { signature } | Token::Committer { signature }) => Some(signature),
110            _ => None,
111        })
112    }
113
114    /// Returns the committer signature if there is no decoding error.
115    pub fn committer(mut self) -> Result<gix_actor::SignatureRef<'a>, crate::decode::Error> {
116        self.find_map(|t| match t {
117            Ok(Token::Committer { signature }) => Some(Ok(signature)),
118            Err(err) => Some(Err(err)),
119            _ => None,
120        })
121        .ok_or_else(missing_field)?
122    }
123
124    /// Returns the author signature if there is no decoding error.
125    ///
126    /// It may contain white space surrounding it, and is exactly as parsed.
127    pub fn author(mut self) -> Result<gix_actor::SignatureRef<'a>, crate::decode::Error> {
128        self.find_map(|t| match t {
129            Ok(Token::Author { signature }) => Some(Ok(signature)),
130            Err(err) => Some(Err(err)),
131            _ => None,
132        })
133        .ok_or_else(missing_field)?
134    }
135
136    /// Returns the message if there is no decoding error.
137    ///
138    /// It may contain white space surrounding it, and is exactly as
139    //  parsed.
140    pub fn message(mut self) -> Result<&'a BStr, crate::decode::Error> {
141        self.find_map(|t| match t {
142            Ok(Token::Message(msg)) => Some(Ok(msg)),
143            Err(err) => Some(Err(err)),
144            _ => None,
145        })
146        .transpose()
147        .map(Option::unwrap_or_default)
148    }
149}
150
151fn missing_field() -> crate::decode::Error {
152    crate::decode::empty_error()
153}
154
155impl<'a> CommitRefIter<'a> {
156    #[inline]
157    fn next_inner(mut i: &'a [u8], state: &mut State) -> Result<(&'a [u8], Token<'a>), crate::decode::Error> {
158        let input = &mut i;
159        match Self::next_inner_(input, state) {
160            Ok(token) => Ok((*input, token)),
161            Err(err) => Err(crate::decode::Error::with_err(err, input)),
162        }
163    }
164
165    fn next_inner_(
166        input: &mut &'a [u8],
167        state: &mut State,
168    ) -> Result<Token<'a>, winnow::error::ErrMode<crate::decode::ParseError>> {
169        use State::*;
170        Ok(match state {
171            Tree => {
172                let tree = (|i: &mut _| parse::header_field(i, b"tree", parse::hex_hash))
173                    .context(StrContext::Expected("tree <40 lowercase hex char>".into()))
174                    .parse_next(input)?;
175                *state = State::Parents;
176                Token::Tree {
177                    id: ObjectId::from_hex(tree).expect("parsing validation"),
178                }
179            }
180            Parents => {
181                let parent = opt(|i: &mut _| parse::header_field(i, b"parent", parse::hex_hash))
182                    .context(StrContext::Expected("commit <40 lowercase hex char>".into()))
183                    .parse_next(input)?;
184                match parent {
185                    Some(parent) => Token::Parent {
186                        id: ObjectId::from_hex(parent).expect("parsing validation"),
187                    },
188                    None => {
189                        *state = State::Signature {
190                            of: SignatureKind::Author,
191                        };
192                        Self::next_inner_(input, state)?
193                    }
194                }
195            }
196            Signature { ref mut of } => {
197                let who = *of;
198                let (field_name, err_msg) = match of {
199                    SignatureKind::Author => {
200                        *of = SignatureKind::Committer;
201                        (&b"author"[..], "author <signature>")
202                    }
203                    SignatureKind::Committer => {
204                        *state = State::Encoding;
205                        (&b"committer"[..], "committer <signature>")
206                    }
207                };
208                let signature = (|i: &mut _| parse::header_field(i, field_name, parse::signature))
209                    .context(StrContext::Expected(err_msg.into()))
210                    .parse_next(input)?;
211                match who {
212                    SignatureKind::Author => Token::Author { signature },
213                    SignatureKind::Committer => Token::Committer { signature },
214                }
215            }
216            Encoding => {
217                let encoding = opt(|i: &mut _| parse::header_field(i, b"encoding", take_till(1.., NL)))
218                    .context(StrContext::Expected("encoding <encoding>".into()))
219                    .parse_next(input)?;
220                *state = State::ExtraHeaders;
221                match encoding {
222                    Some(encoding) => Token::Encoding(encoding.as_bstr()),
223                    None => Self::next_inner_(input, state)?,
224                }
225            }
226            ExtraHeaders => {
227                let extra_header = opt(alt((
228                    |i: &mut _| parse::any_header_field_multi_line(i).map(|(k, o)| (k.as_bstr(), Cow::Owned(o))),
229                    |i: &mut _| {
230                        parse::any_header_field(i, take_till(1.., NL))
231                            .map(|(k, o)| (k.as_bstr(), Cow::Borrowed(o.as_bstr())))
232                    },
233                )))
234                .context(StrContext::Expected("<field> <single-line|multi-line>".into()))
235                .parse_next(input)?;
236                match extra_header {
237                    Some(extra_header) => Token::ExtraHeader(extra_header),
238                    None => {
239                        *state = State::Message;
240                        Self::next_inner_(input, state)?
241                    }
242                }
243            }
244            Message => {
245                let message = terminated(decode::message, eof).parse_next(input)?;
246                debug_assert!(
247                    input.is_empty(),
248                    "we should have consumed all data - otherwise iter may go forever"
249                );
250                Token::Message(message)
251            }
252        })
253    }
254}
255
256impl<'a> Iterator for CommitRefIter<'a> {
257    type Item = Result<Token<'a>, crate::decode::Error>;
258
259    fn next(&mut self) -> Option<Self::Item> {
260        if self.data.is_empty() {
261            return None;
262        }
263        match Self::next_inner(self.data, &mut self.state) {
264            Ok((data, token)) => {
265                self.data = data;
266                Some(Ok(token))
267            }
268            Err(err) => {
269                self.data = &[];
270                Some(Err(err))
271            }
272        }
273    }
274}
275
276/// A variation of [`CommitRefIter`] that return's [`RawToken`]s instead.
277struct CommitRefIterRaw<'a> {
278    data: &'a [u8],
279    state: State,
280    offset: usize,
281}
282
283impl<'a> Iterator for CommitRefIterRaw<'a> {
284    type Item = Result<RawToken<'a>, crate::decode::Error>;
285
286    fn next(&mut self) -> Option<Self::Item> {
287        if self.data.is_empty() {
288            return None;
289        }
290        match CommitRefIter::next_inner(self.data, &mut self.state) {
291            Ok((remaining, token)) => {
292                let consumed = self.data.len() - remaining.len();
293                let start = self.offset;
294                let end = start + consumed;
295                self.offset = end;
296
297                self.data = remaining;
298                Some(Ok(RawToken {
299                    token,
300                    token_range: start..end,
301                }))
302            }
303            Err(err) => {
304                self.data = &[];
305                Some(Err(err))
306            }
307        }
308    }
309}
310
311/// A combination of a parsed [`Token`] as well as the range of bytes that were consumed to parse it.
312struct RawToken<'a> {
313    /// The parsed token.
314    token: Token<'a>,
315    token_range: Range<usize>,
316}
317
318/// A token returned by the [commit iterator][CommitRefIter].
319#[allow(missing_docs)]
320#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)]
321pub enum Token<'a> {
322    Tree {
323        id: ObjectId,
324    },
325    Parent {
326        id: ObjectId,
327    },
328    /// A person who authored the content of the commit.
329    Author {
330        signature: gix_actor::SignatureRef<'a>,
331    },
332    /// A person who committed the authors work to the repository.
333    Committer {
334        signature: gix_actor::SignatureRef<'a>,
335    },
336    Encoding(&'a BStr),
337    ExtraHeader((&'a BStr, Cow<'a, BStr>)),
338    Message(&'a BStr),
339}
340
341impl Token<'_> {
342    /// Return the object id of this token if it's a [tree][Token::Tree] or a [parent commit][Token::Parent].
343    pub fn id(&self) -> Option<&oid> {
344        match self {
345            Token::Tree { id } | Token::Parent { id } => Some(id.as_ref()),
346            _ => None,
347        }
348    }
349
350    /// Return the owned object id of this token if it's a [tree][Token::Tree] or a [parent commit][Token::Parent].
351    pub fn try_into_id(self) -> Option<ObjectId> {
352        match self {
353            Token::Tree { id } | Token::Parent { id } => Some(id),
354            _ => None,
355        }
356    }
357}