gix_odb/store_impls/loose/
find.rs

1use std::{cmp::Ordering, collections::HashSet, fs, io::Read, path::PathBuf};
2
3use gix_features::zlib;
4
5use crate::store_impls::loose::{hash_path, Store, HEADER_MAX_SIZE};
6
7/// Returned by [`Store::try_find()`]
8#[derive(thiserror::Error, Debug)]
9#[allow(missing_docs)]
10pub enum Error {
11    #[error("decompression of loose object at '{path}' failed")]
12    DecompressFile {
13        source: zlib::inflate::Error,
14        path: PathBuf,
15    },
16    #[error("file at '{path}' showed invalid size of inflated data, expected {expected}, got {actual}")]
17    SizeMismatch { actual: u64, expected: u64, path: PathBuf },
18    #[error(transparent)]
19    Decode(#[from] gix_object::decode::LooseHeaderDecodeError),
20    #[error("Cannot store {size} in memory as it's not representable")]
21    OutOfMemory { size: u64 },
22    #[error("Could not {action} data at '{path}'")]
23    Io {
24        source: std::io::Error,
25        action: &'static str,
26        path: PathBuf,
27    },
28}
29
30/// Object lookup
31impl Store {
32    const OPEN_ACTION: &'static str = "open";
33
34    /// Returns true if the given id is contained in our repository.
35    pub fn contains(&self, id: &gix_hash::oid) -> bool {
36        debug_assert_eq!(self.object_hash, id.kind());
37        hash_path(id, self.path.clone()).is_file()
38    }
39
40    /// Given a `prefix`, find an object that matches it uniquely within this loose object
41    /// database as `Ok(Some(Ok(<oid>)))`.
42    /// If there is more than one object matching the object `Ok(Some(Err(()))` is returned.
43    ///
44    /// Finally, if no object matches, the return value is `Ok(None)`.
45    ///
46    /// The outer `Result` is to indicate errors during file system traversal.
47    ///
48    /// Pass `candidates` to obtain the set of all object ids matching `prefix`, with the same return value as
49    /// one would have received if it remained `None`.
50    pub fn lookup_prefix(
51        &self,
52        prefix: gix_hash::Prefix,
53        mut candidates: Option<&mut HashSet<gix_hash::ObjectId>>,
54    ) -> Result<Option<crate::store::prefix::lookup::Outcome>, crate::loose::iter::Error> {
55        let single_directory_iter = crate::loose::Iter {
56            inner: gix_features::fs::walkdir_new(
57                &self.path.join(prefix.as_oid().to_hex_with_len(2).to_string()),
58                gix_features::fs::walkdir::Parallelism::Serial,
59                false,
60            )
61            .min_depth(1)
62            .max_depth(1)
63            .follow_links(false)
64            .into_iter(),
65            hash_hex_len: prefix.as_oid().kind().len_in_hex(),
66        };
67        let mut candidate = None;
68        for oid in single_directory_iter {
69            let oid = match oid {
70                Ok(oid) => oid,
71                Err(err) => {
72                    return match err.io_error() {
73                        Some(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(None),
74                        None | Some(_) => Err(err),
75                    }
76                }
77            };
78            if prefix.cmp_oid(&oid) == Ordering::Equal {
79                match &mut candidates {
80                    Some(candidates) => {
81                        candidates.insert(oid);
82                    }
83                    None => {
84                        if candidate.is_some() {
85                            return Ok(Some(Err(())));
86                        }
87                        candidate = Some(oid);
88                    }
89                }
90            }
91        }
92
93        match &mut candidates {
94            Some(candidates) => match candidates.len() {
95                0 => Ok(None),
96                1 => Ok(candidates.iter().next().copied().map(Ok)),
97                _ => Ok(Some(Err(()))),
98            },
99            None => Ok(candidate.map(Ok)),
100        }
101    }
102
103    /// Return the object identified by the given [`ObjectId`][gix_hash::ObjectId] if present in this database,
104    /// writing its raw data into the given `out` buffer.
105    ///
106    /// Returns `Err` if there was an error locating or reading the object. Returns `Ok<None>` if
107    /// there was no such object.
108    pub fn try_find<'a>(
109        &self,
110        id: &gix_hash::oid,
111        out: &'a mut Vec<u8>,
112    ) -> Result<Option<gix_object::Data<'a>>, Error> {
113        debug_assert_eq!(self.object_hash, id.kind());
114        match self.find_inner(id, out) {
115            Ok(obj) => Ok(Some(obj)),
116            Err(err) => match err {
117                Error::Io {
118                    source: err,
119                    action,
120                    path,
121                } => {
122                    if action == Self::OPEN_ACTION && err.kind() == std::io::ErrorKind::NotFound {
123                        Ok(None)
124                    } else {
125                        Err(Error::Io {
126                            source: err,
127                            action,
128                            path,
129                        })
130                    }
131                }
132                err => Err(err),
133            },
134        }
135    }
136
137    /// Return only the decompressed size of the object and its kind without fully reading it into memory as tuple of `(size, kind)`.
138    /// Returns `None` if `id` does not exist in the database.
139    pub fn try_header(&self, id: &gix_hash::oid) -> Result<Option<(u64, gix_object::Kind)>, Error> {
140        const BUF_SIZE: usize = 256;
141        let mut buf = [0_u8; BUF_SIZE];
142        let path = hash_path(id, self.path.clone());
143
144        let mut inflate = zlib::Inflate::default();
145        let mut istream = match fs::File::open(&path) {
146            Ok(f) => f,
147            Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(None),
148            Err(err) => {
149                return Err(Error::Io {
150                    source: err,
151                    action: Self::OPEN_ACTION,
152                    path,
153                })
154            }
155        };
156
157        let (compressed_buf, _) = buf.split_at_mut(BUF_SIZE - HEADER_MAX_SIZE);
158        let bytes_read = istream.read(compressed_buf).map_err(|e| Error::Io {
159            source: e,
160            action: "read",
161            path: path.to_owned(),
162        })?;
163        let (compressed_buf, header_buf) = buf.split_at_mut(bytes_read);
164        let (status, _consumed_in, consumed_out) =
165            inflate
166                .once(compressed_buf, header_buf)
167                .map_err(|e| Error::DecompressFile {
168                    source: e,
169                    path: path.to_owned(),
170                })?;
171
172        if status == zlib::Status::BufError {
173            return Err(Error::DecompressFile {
174                source: zlib::inflate::Error::Status(status),
175                path,
176            });
177        }
178        let (kind, size, _header_size) = gix_object::decode::loose_header(&header_buf[..consumed_out])?;
179        Ok(Some((size, kind)))
180    }
181
182    fn find_inner<'a>(&self, id: &gix_hash::oid, buf: &'a mut Vec<u8>) -> Result<gix_object::Data<'a>, Error> {
183        let path = hash_path(id, self.path.clone());
184
185        let mut inflate = zlib::Inflate::default();
186        let ((status, consumed_in, consumed_out), bytes_read) = {
187            let mut istream = fs::File::open(&path).map_err(|e| Error::Io {
188                source: e,
189                action: Self::OPEN_ACTION,
190                path: path.to_owned(),
191            })?;
192
193            buf.clear();
194            let bytes_read = istream.read_to_end(buf).map_err(|e| Error::Io {
195                source: e,
196                action: "read",
197                path: path.to_owned(),
198            })?;
199            buf.resize(bytes_read + HEADER_MAX_SIZE, 0);
200            let (input, output) = buf.split_at_mut(bytes_read);
201            (
202                inflate
203                    .once(&input[..bytes_read], output)
204                    .map_err(|e| Error::DecompressFile {
205                        source: e,
206                        path: path.to_owned(),
207                    })?,
208                bytes_read,
209            )
210        };
211        if status == zlib::Status::BufError {
212            return Err(Error::DecompressFile {
213                source: zlib::inflate::Error::Status(status),
214                path,
215            });
216        }
217
218        let decompressed_start = bytes_read;
219        let (kind, size, header_size) =
220            gix_object::decode::loose_header(&buf[decompressed_start..decompressed_start + consumed_out])?;
221
222        if status == zlib::Status::StreamEnd {
223            let decompressed_body_bytes_sans_header =
224                decompressed_start + header_size..decompressed_start + consumed_out;
225
226            if consumed_out as u64 != size + header_size as u64 {
227                return Err(Error::SizeMismatch {
228                    expected: size + header_size as u64,
229                    actual: consumed_out as u64,
230                    path,
231                });
232            }
233            buf.copy_within(decompressed_body_bytes_sans_header, 0);
234        } else {
235            let new_len = bytes_read as u64 + size + header_size as u64;
236            buf.resize(new_len.try_into().map_err(|_| Error::OutOfMemory { size: new_len })?, 0);
237            {
238                let (input, output) = buf.split_at_mut(bytes_read);
239                let num_decompressed_bytes = zlib::stream::inflate::read(
240                    &mut &input[consumed_in..],
241                    &mut inflate.state,
242                    &mut output[consumed_out..],
243                )
244                .map_err(|e| Error::Io {
245                    source: e,
246                    action: "deflate",
247                    path: path.to_owned(),
248                })?;
249                if num_decompressed_bytes as u64 + consumed_out as u64 != size + header_size as u64 {
250                    return Err(Error::SizeMismatch {
251                        expected: size + header_size as u64,
252                        actual: num_decompressed_bytes as u64 + consumed_out as u64,
253                        path,
254                    });
255                }
256            };
257            buf.copy_within(decompressed_start + header_size.., 0);
258        }
259        buf.resize(
260            size.try_into()
261                .expect("BUG: here the size is already confirmed to fit into memory"),
262            0,
263        );
264        Ok(gix_object::Data { kind, data: buf })
265    }
266}