gix_ref/store/file/log/
iter.rs

1use gix_object::bstr::ByteSlice;
2
3use crate::{
4    file,
5    file::loose::reference::logiter::must_be_io_err,
6    store_impl::file::{log, log::iter::decode::LineNumber},
7    FullNameRef,
8};
9
10///
11pub mod decode {
12    use crate::store_impl::file::log;
13
14    /// The error returned by items in the [forward][super::forward()] and [reverse][super::reverse()] iterators
15    #[derive(Debug)]
16    pub struct Error {
17        inner: log::line::decode::Error,
18        line: LineNumber,
19    }
20
21    impl std::fmt::Display for Error {
22        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
23            write!(f, "In line {}: {}", self.line, self.inner)
24        }
25    }
26
27    impl std::error::Error for Error {}
28
29    impl Error {
30        pub(crate) fn new(err: log::line::decode::Error, line: LineNumber) -> Self {
31            Error { line, inner: err }
32        }
33    }
34
35    #[derive(Debug)]
36    pub(crate) enum LineNumber {
37        FromStart(usize),
38        FromEnd(usize),
39    }
40
41    impl std::fmt::Display for LineNumber {
42        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
43            let (line, suffix) = match self {
44                LineNumber::FromStart(line) => (line, ""),
45                LineNumber::FromEnd(line) => (line, " from the end"),
46            };
47            write!(f, "{}{}", line + 1, suffix)
48        }
49    }
50}
51
52/// Returns a forward iterator over the given `lines`, starting from the first line in the file and ending at the last.
53///
54/// Note that `lines` are an entire reflog file.
55///
56/// This iterator is useful when the ref log file is going to be rewritten which forces processing of the entire file.
57/// It will continue parsing even if individual log entries failed to parse, leaving it to the driver to decide whether to
58/// abort or continue.
59pub fn forward(lines: &[u8]) -> Forward<'_> {
60    Forward {
61        inner: lines.as_bstr().lines().enumerate(),
62    }
63}
64
65/// An iterator yielding parsed lines in a file from start to end, oldest to newest.
66pub struct Forward<'a> {
67    inner: std::iter::Enumerate<gix_object::bstr::Lines<'a>>,
68}
69
70impl<'a> Iterator for Forward<'a> {
71    type Item = Result<log::LineRef<'a>, decode::Error>;
72
73    fn next(&mut self) -> Option<Self::Item> {
74        self.inner.next().map(|(ln, line)| {
75            log::LineRef::from_bytes(line).map_err(|err| decode::Error::new(err, decode::LineNumber::FromStart(ln)))
76        })
77    }
78}
79
80/// A platform to store a buffer to hold ref log lines for iteration.
81#[must_use = "Iterators should be obtained from this platform"]
82pub struct Platform<'a, 's> {
83    /// The store containing the reflogs
84    pub store: &'s file::Store,
85    /// The full name of the reference whose reflog to retrieve.
86    pub name: &'a FullNameRef,
87    /// A reusable buffer for storing log lines read from disk.
88    pub buf: Vec<u8>,
89}
90
91impl Platform<'_, '_> {
92    /// Return a forward iterator over all log-lines, most recent to oldest.
93    pub fn rev(&mut self) -> std::io::Result<Option<log::iter::Reverse<'_, std::fs::File>>> {
94        self.buf.clear();
95        self.buf.resize(1024 * 4, 0);
96        self.store
97            .reflog_iter_rev(self.name, &mut self.buf)
98            .map_err(must_be_io_err)
99    }
100
101    /// Return a forward iterator over all log-lines, oldest to most recent.
102    pub fn all(&mut self) -> std::io::Result<Option<log::iter::Forward<'_>>> {
103        self.buf.clear();
104        self.store.reflog_iter(self.name, &mut self.buf).map_err(must_be_io_err)
105    }
106}
107
108/// An iterator yielding parsed lines in a file in reverse, most recent to oldest.
109pub struct Reverse<'a, F> {
110    buf: &'a mut [u8],
111    count: usize,
112    read_and_pos: Option<(F, u64)>,
113    last_nl_pos: Option<usize>,
114}
115
116/// An iterator over entries of the `log` file in reverse, using `buf` as sliding window.
117///
118/// Note that `buf` must be big enough to capture typical line length or else partial lines will be parsed and probably fail
119/// in the process.
120///
121/// This iterator is very expensive in terms of I/O operations and shouldn't be used to read more than the last few entries of the log.
122/// Use a forward iterator instead for these cases.
123///
124/// It will continue parsing even if individual log entries failed to parse, leaving it to the driver to decide whether to
125/// abort or continue.
126pub fn reverse<F>(mut log: F, buf: &mut [u8]) -> std::io::Result<Reverse<'_, F>>
127where
128    F: std::io::Read + std::io::Seek,
129{
130    let pos = log.seek(std::io::SeekFrom::End(0))?;
131    if buf.is_empty() {
132        return Err(std::io::Error::new(
133            std::io::ErrorKind::Other,
134            "Zero sized buffers are not allowed, use 256 bytes or more for typical logs",
135        ));
136    }
137    Ok(Reverse {
138        buf,
139        count: 0,
140        read_and_pos: Some((log, pos)),
141        last_nl_pos: None,
142    })
143}
144
145///
146pub mod reverse {
147
148    use super::decode;
149
150    /// The error returned by the [`Reverse`][super::Reverse] iterator
151    #[derive(Debug, thiserror::Error)]
152    #[allow(missing_docs)]
153    pub enum Error {
154        #[error("The buffer could not be filled to make more lines available")]
155        Io(#[from] std::io::Error),
156        #[error("Could not decode log line")]
157        Decode(#[from] decode::Error),
158    }
159}
160
161impl<F> Iterator for Reverse<'_, F>
162where
163    F: std::io::Read + std::io::Seek,
164{
165    type Item = Result<crate::log::Line, reverse::Error>;
166
167    fn next(&mut self) -> Option<Self::Item> {
168        match (self.last_nl_pos.take(), self.read_and_pos.take()) {
169            // Initial state - load first data block
170            (None, Some((mut read, pos))) => {
171                let npos = pos.saturating_sub(self.buf.len() as u64);
172                if let Err(err) = read.seek(std::io::SeekFrom::Start(npos)) {
173                    return Some(Err(err.into()));
174                }
175
176                let n = (pos - npos) as usize;
177                if n == 0 {
178                    return None;
179                }
180                let buf = &mut self.buf[..n];
181                if let Err(err) = read.read_exact(buf) {
182                    return Some(Err(err.into()));
183                };
184
185                let last_byte = *buf.last().expect("we have read non-zero bytes before");
186                self.last_nl_pos = Some(if last_byte != b'\n' { buf.len() } else { buf.len() - 1 });
187                self.read_and_pos = Some((read, npos));
188                self.next()
189            }
190            // Has data block and can extract lines from it, load new blocks as needed
191            (Some(end), Some(read_and_pos)) => match self.buf[..end].rfind_byte(b'\n') {
192                Some(start) => {
193                    self.read_and_pos = Some(read_and_pos);
194                    self.last_nl_pos = Some(start);
195                    let buf = &self.buf[start + 1..end];
196                    let res = Some(
197                        log::LineRef::from_bytes(buf)
198                            .map_err(|err| {
199                                reverse::Error::Decode(decode::Error::new(err, LineNumber::FromEnd(self.count)))
200                            })
201                            .map(Into::into),
202                    );
203                    self.count += 1;
204                    res
205                }
206                None => {
207                    let (mut read, last_read_pos) = read_and_pos;
208                    if last_read_pos == 0 {
209                        let buf = &self.buf[..end];
210                        Some(
211                            log::LineRef::from_bytes(buf)
212                                .map_err(|err| {
213                                    reverse::Error::Decode(decode::Error::new(err, LineNumber::FromEnd(self.count)))
214                                })
215                                .map(Into::into),
216                        )
217                    } else {
218                        let npos = last_read_pos.saturating_sub((self.buf.len() - end) as u64);
219                        if npos == last_read_pos {
220                            return Some(Err(std::io::Error::new(
221                                std::io::ErrorKind::Other,
222                                format!("buffer too small for line size, got until {:?}", self.buf.as_bstr()),
223                            )
224                            .into()));
225                        }
226                        let n = (last_read_pos - npos) as usize;
227                        self.buf.copy_within(0..end, n);
228                        if let Err(err) = read.seek(std::io::SeekFrom::Start(npos)) {
229                            return Some(Err(err.into()));
230                        }
231                        if let Err(err) = read.read_exact(&mut self.buf[..n]) {
232                            return Some(Err(err.into()));
233                        }
234                        self.read_and_pos = Some((read, npos));
235                        self.last_nl_pos = Some(n + end);
236                        self.next()
237                    }
238                }
239            },
240            // depleted
241            (None, None) => None,
242            (Some(_), None) => unreachable!("BUG: Invalid state: we never discard only our file, always both."),
243        }
244    }
245}