radicle_surf/
fs.rs

1// This file is part of radicle-surf
2// <https://github.com/radicle-dev/radicle-surf>
3//
4// Copyright (C) 2019-2020 The Radicle Team <dev@radicle.xyz>
5//
6// This program is free software: you can redistribute it and/or modify
7// it under the terms of the GNU General Public License version 3 or
8// later as published by the Free Software Foundation.
9//
10// This program is distributed in the hope that it will be useful,
11// but WITHOUT ANY WARRANTY; without even the implied warranty of
12// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13// GNU General Public License for more details.
14//
15// You should have received a copy of the GNU General Public License
16// along with this program. If not, see <https://www.gnu.org/licenses/>.
17
18//! Definition for a file system consisting of `Directory` and `File`.
19//!
20//! A `Directory` is expected to be a non-empty tree of directories and files.
21//! See [`Directory`] for more information.
22
23use std::{
24    cmp::Ordering,
25    collections::BTreeMap,
26    convert::{Infallible, Into as _},
27    path::{Path, PathBuf},
28};
29
30use git2::Blob;
31use radicle_git_ext::{is_not_found_err, Oid};
32use radicle_std_ext::result::ResultExt as _;
33use url::Url;
34
35use crate::{Repository, Revision};
36
37pub mod error {
38    use std::path::PathBuf;
39
40    use thiserror::Error;
41
42    #[derive(Debug, Error, PartialEq)]
43    pub enum Directory {
44        #[error(transparent)]
45        Git(#[from] git2::Error),
46        #[error(transparent)]
47        File(#[from] File),
48        #[error("the path {0} is not valid")]
49        InvalidPath(PathBuf),
50        #[error("the entry at '{0}' must be of type {1}")]
51        InvalidType(PathBuf, &'static str),
52        #[error("the entry name was not valid UTF-8")]
53        Utf8Error,
54        #[error("the path {0} not found")]
55        PathNotFound(PathBuf),
56        #[error(transparent)]
57        Submodule(#[from] Submodule),
58    }
59
60    #[derive(Debug, Error, PartialEq)]
61    pub enum File {
62        #[error(transparent)]
63        Git(#[from] git2::Error),
64    }
65
66    #[derive(Debug, Error, PartialEq)]
67    pub enum Submodule {
68        #[error("URL is invalid utf-8 for submodule '{name}': {err}")]
69        Utf8 {
70            name: String,
71            #[source]
72            err: std::str::Utf8Error,
73        },
74        #[error("failed to parse URL '{url}' for submodule '{name}': {err}")]
75        ParseUrl {
76            name: String,
77            url: String,
78            #[source]
79            err: url::ParseError,
80        },
81    }
82}
83
84/// A `File` in a git repository.
85///
86/// The representation is lightweight and contains the [`Oid`] that
87/// points to the git blob which is this file.
88///
89/// The name of a file can be retrieved via [`File::name`].
90///
91/// The [`FileContent`] of a file can be retrieved via
92/// [`File::content`].
93#[derive(Clone, PartialEq, Eq, Debug)]
94pub struct File {
95    /// The name of the file.
96    name: String,
97    /// The relative path of the file, not including the `name`,
98    /// in respect to the root of the git repository.
99    prefix: PathBuf,
100    /// The object identifier of the git blob of this file.
101    id: Oid,
102}
103
104impl File {
105    /// Construct a new `File`.
106    ///
107    /// The `path` must be the prefix location of the directory, and
108    /// so should not end in `name`.
109    ///
110    /// The `id` must point to a git blob.
111    pub(crate) fn new(name: String, prefix: PathBuf, id: Oid) -> Self {
112        debug_assert!(
113            !prefix.ends_with(&name),
114            "prefix = {prefix:?}, name = {name}",
115        );
116        Self { name, prefix, id }
117    }
118
119    /// The name of this `File`.
120    pub fn name(&self) -> &str {
121        self.name.as_str()
122    }
123
124    /// The object identifier of this `File`.
125    pub fn id(&self) -> Oid {
126        self.id
127    }
128
129    /// Return the exact path for this `File`, including the `name` of
130    /// the directory itself.
131    ///
132    /// The path is relative to the git repository root.
133    pub fn path(&self) -> PathBuf {
134        self.prefix.join(escaped_name(&self.name))
135    }
136
137    /// Return the [`Path`] where this `File` is located, relative to the
138    /// git repository root.
139    pub fn location(&self) -> &Path {
140        &self.prefix
141    }
142
143    /// Get the [`FileContent`] for this `File`.
144    ///
145    /// # Errors
146    ///
147    /// This function will fail if it could not find the `git` blob
148    /// for the `Oid` of this `File`.
149    pub fn content<'a>(&self, repo: &'a Repository) -> Result<FileContent<'a>, error::File> {
150        let blob = repo.find_blob(self.id)?;
151        Ok(FileContent { blob })
152    }
153}
154
155/// The contents of a [`File`].
156///
157/// To construct a `FileContent` use [`File::content`].
158pub struct FileContent<'a> {
159    blob: Blob<'a>,
160}
161
162impl<'a> FileContent<'a> {
163    /// Return the file contents as a byte slice.
164    pub fn as_bytes(&self) -> &[u8] {
165        self.blob.content()
166    }
167
168    /// Return the size of the file contents.
169    pub fn size(&self) -> usize {
170        self.blob.size()
171    }
172
173    /// Creates a `FileContent` using a blob.
174    pub(crate) fn new(blob: Blob<'a>) -> Self {
175        Self { blob }
176    }
177}
178
179/// A representations of a [`Directory`]'s entries.
180pub struct Entries {
181    listing: BTreeMap<String, Entry>,
182}
183
184impl Entries {
185    /// Return the name of each [`Entry`].
186    pub fn names(&self) -> impl Iterator<Item = &String> {
187        self.listing.keys()
188    }
189
190    /// Return each [`Entry`].
191    pub fn entries(&self) -> impl Iterator<Item = &Entry> {
192        self.listing.values()
193    }
194
195    /// Return each [`Entry`] and its name.
196    pub fn iter(&self) -> impl Iterator<Item = (&String, &Entry)> {
197        self.listing.iter()
198    }
199}
200
201impl Iterator for Entries {
202    type Item = Entry;
203
204    fn next(&mut self) -> Option<Self::Item> {
205        // Can be improved when `pop_first()` is stable for BTreeMap.
206        let next_key = match self.listing.keys().next() {
207            Some(k) => k.clone(),
208            None => return None,
209        };
210        self.listing.remove(&next_key)
211    }
212}
213
214/// An `Entry` is either a [`File`] entry or a [`Directory`] entry.
215#[derive(Debug, Clone, PartialEq, Eq)]
216pub enum Entry {
217    /// A file entry within a [`Directory`].
218    File(File),
219    /// A sub-directory of a [`Directory`].
220    Directory(Directory),
221    /// An entry points to a submodule.
222    Submodule(Submodule),
223}
224
225impl PartialOrd for Entry {
226    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
227        Some(self.cmp(other))
228    }
229}
230
231impl Ord for Entry {
232    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
233        match (self, other) {
234            (Entry::File(x), Entry::File(y)) => x.name().cmp(y.name()),
235            (Entry::File(_), Entry::Directory(_)) => Ordering::Less,
236            (Entry::File(_), Entry::Submodule(_)) => Ordering::Less,
237            (Entry::Directory(_), Entry::File(_)) => Ordering::Greater,
238            (Entry::Submodule(_), Entry::File(_)) => Ordering::Less,
239            (Entry::Directory(x), Entry::Directory(y)) => x.name().cmp(y.name()),
240            (Entry::Directory(x), Entry::Submodule(y)) => x.name().cmp(y.name()),
241            (Entry::Submodule(x), Entry::Directory(y)) => x.name().cmp(y.name()),
242            (Entry::Submodule(x), Entry::Submodule(y)) => x.name().cmp(y.name()),
243        }
244    }
245}
246
247impl Entry {
248    /// Get a label for the `Entriess`, either the name of the [`File`],
249    /// the name of the [`Directory`], or the name of the [`Submodule`].
250    pub fn name(&self) -> &String {
251        match self {
252            Entry::File(file) => &file.name,
253            Entry::Directory(directory) => directory.name(),
254            Entry::Submodule(submodule) => submodule.name(),
255        }
256    }
257
258    pub fn path(&self) -> PathBuf {
259        match self {
260            Entry::File(file) => file.path(),
261            Entry::Directory(directory) => directory.path(),
262            Entry::Submodule(submodule) => submodule.path(),
263        }
264    }
265
266    pub fn location(&self) -> &Path {
267        match self {
268            Entry::File(file) => file.location(),
269            Entry::Directory(directory) => directory.location(),
270            Entry::Submodule(submodule) => submodule.location(),
271        }
272    }
273
274    /// Returns `true` if the `Entry` is a file.
275    pub fn is_file(&self) -> bool {
276        matches!(self, Entry::File(_))
277    }
278
279    /// Returns `true` if the `Entry` is a directory.
280    pub fn is_directory(&self) -> bool {
281        matches!(self, Entry::Directory(_))
282    }
283
284    pub(crate) fn from_entry(
285        entry: &git2::TreeEntry,
286        path: PathBuf,
287        repo: &Repository,
288    ) -> Result<Self, error::Directory> {
289        let name = entry.name().ok_or(error::Directory::Utf8Error)?.to_string();
290        let id = entry.id().into();
291
292        match entry.kind() {
293            Some(git2::ObjectType::Tree) => Ok(Self::Directory(Directory::new(name, path, id))),
294            Some(git2::ObjectType::Blob) => Ok(Self::File(File::new(name, path, id))),
295            Some(git2::ObjectType::Commit) => {
296                let submodule = (!repo.is_bare())
297                    .then(|| repo.find_submodule(&name))
298                    .transpose()?;
299                Ok(Self::Submodule(Submodule::new(name, path, submodule, id)?))
300            }
301            _ => Err(error::Directory::InvalidType(path, "tree or blob")),
302        }
303    }
304}
305
306/// A `Directory` is the representation of a file system directory, for a given
307/// [`git` tree][git-tree].
308///
309/// The name of a directory can be retrieved via [`File::name`].
310///
311/// The [`Entries`] of a directory can be retrieved via
312/// [`Directory::entries`].
313///
314/// [git-tree]: https://git-scm.com/book/en/v2/Git-Internals-Git-Objects
315#[derive(Debug, Clone, PartialEq, Eq)]
316pub struct Directory {
317    /// The name of the directoy.
318    name: String,
319    /// The relative path of the directory, not including the `name`,
320    /// in respect to the root of the git repository.
321    prefix: PathBuf,
322    /// The object identifier of the git tree of this directory.
323    id: Oid,
324}
325
326const ROOT_DIR: &str = "";
327
328impl Directory {
329    /// Creates a directory given its `tree_id`.
330    ///
331    /// The `name` and `prefix` are both set to be empty.
332    pub(crate) fn root(id: Oid) -> Self {
333        Self::new(ROOT_DIR.to_string(), PathBuf::new(), id)
334    }
335
336    /// Creates a directory given its `name` and `id`.
337    ///
338    /// The `path` must be the prefix location of the directory, and
339    /// so should not end in `name`.
340    ///
341    /// The `id` must point to a `git` tree.
342    pub(crate) fn new(name: String, prefix: PathBuf, id: Oid) -> Self {
343        debug_assert!(
344            name.is_empty() || !prefix.ends_with(&name),
345            "prefix = {prefix:?}, name = {name}",
346        );
347        Self { name, prefix, id }
348    }
349
350    /// Get the name of the current `Directory`.
351    pub fn name(&self) -> &String {
352        &self.name
353    }
354
355    /// The object identifier of this `[Directory]`.
356    pub fn id(&self) -> Oid {
357        self.id
358    }
359
360    /// Return the exact path for this `Directory`, including the `name` of the
361    /// directory itself.
362    ///
363    /// The path is relative to the git repository root.
364    pub fn path(&self) -> PathBuf {
365        self.prefix.join(escaped_name(&self.name))
366    }
367
368    /// Return the [`Path`] where this `Directory` is located, relative to the
369    /// git repository root.
370    pub fn location(&self) -> &Path {
371        &self.prefix
372    }
373
374    /// Return the [`Entries`] for this `Directory`'s `Oid`.
375    ///
376    /// The resulting `Entries` will only resolve to this
377    /// `Directory`'s entries. Any sub-directories will need to be
378    /// resolved independently.
379    ///
380    /// # Errors
381    ///
382    /// This function will fail if it could not find the `git` tree
383    /// for the `Oid`.
384    pub fn entries(&self, repo: &Repository) -> Result<Entries, error::Directory> {
385        let tree = repo.find_tree(self.id)?;
386
387        let mut entries = BTreeMap::new();
388        let mut error = None;
389        let path = self.path();
390
391        // Walks only the first level of entries. And `_entry_path` is always
392        // empty for the first level.
393        tree.walk(git2::TreeWalkMode::PreOrder, |_entry_path, entry| {
394            match Entry::from_entry(entry, path.clone(), repo) {
395                Ok(entry) => match entry {
396                    Entry::File(_) => {
397                        entries.insert(entry.name().clone(), entry);
398                        git2::TreeWalkResult::Ok
399                    }
400                    Entry::Directory(_) => {
401                        entries.insert(entry.name().clone(), entry);
402                        // Skip nested directories
403                        git2::TreeWalkResult::Skip
404                    }
405                    Entry::Submodule(_) => {
406                        entries.insert(entry.name().clone(), entry);
407                        git2::TreeWalkResult::Ok
408                    }
409                },
410                Err(err) => {
411                    error = Some(err);
412                    git2::TreeWalkResult::Abort
413                }
414            }
415        })?;
416
417        match error {
418            Some(err) => Err(err),
419            None => Ok(Entries { listing: entries }),
420        }
421    }
422
423    /// Find the [`Entry`] found at a non-empty `path`, if it exists.
424    pub fn find_entry<P>(&self, path: &P, repo: &Repository) -> Result<Entry, error::Directory>
425    where
426        P: AsRef<Path>,
427    {
428        // Search the path in git2 tree.
429        let path = path.as_ref();
430        let git2_tree = repo.find_tree(self.id)?;
431        let entry = git2_tree
432            .get_path(path)
433            .or_matches::<error::Directory, _, _>(is_not_found_err, || {
434                Err(error::Directory::PathNotFound(path.to_path_buf()))
435            })?;
436        let parent = path
437            .parent()
438            .ok_or_else(|| error::Directory::InvalidPath(path.to_path_buf()))?;
439        let root_path = self.path().join(parent);
440
441        Entry::from_entry(&entry, root_path, repo)
442    }
443
444    /// Find the `Oid`, for a [`File`], found at `path`, if it exists.
445    pub fn find_file<P>(&self, path: &P, repo: &Repository) -> Result<File, error::Directory>
446    where
447        P: AsRef<Path>,
448    {
449        match self.find_entry(path, repo)? {
450            Entry::File(file) => Ok(file),
451            _ => Err(error::Directory::InvalidType(
452                path.as_ref().to_path_buf(),
453                "file",
454            )),
455        }
456    }
457
458    /// Find the `Directory` found at `path`, if it exists.
459    ///
460    /// If `path` is `ROOT_DIR` (i.e. an empty path), returns self.
461    pub fn find_directory<P>(&self, path: &P, repo: &Repository) -> Result<Self, error::Directory>
462    where
463        P: AsRef<Path>,
464    {
465        if path.as_ref() == Path::new(ROOT_DIR) {
466            return Ok(self.clone());
467        }
468
469        match self.find_entry(path, repo)? {
470            Entry::Directory(d) => Ok(d),
471            _ => Err(error::Directory::InvalidType(
472                path.as_ref().to_path_buf(),
473                "directory",
474            )),
475        }
476    }
477
478    // TODO(fintan): This is going to be a bit trickier so going to leave it out for
479    // now
480    #[allow(dead_code)]
481    fn fuzzy_find(_label: &Path) -> Vec<Self> {
482        unimplemented!()
483    }
484
485    /// Get the total size, in bytes, of a `Directory`. The size is
486    /// the sum of all files that can be reached from this `Directory`.
487    pub fn size(&self, repo: &Repository) -> Result<usize, error::Directory> {
488        self.traverse(repo, 0, &mut |size, entry| match entry {
489            Entry::File(file) => Ok(size + file.content(repo)?.size()),
490            Entry::Directory(dir) => Ok(size + dir.size(repo)?),
491            Entry::Submodule(_) => Ok(size),
492        })
493    }
494
495    /// Traverse the entire `Directory` using the `initial`
496    /// accumulator and the function `f`.
497    ///
498    /// For each [`Entry::Directory`] this will recursively call
499    /// [`Directory::traverse`] and obtain its [`Entries`].
500    ///
501    /// `Error` is the error type of the fallible function.
502    /// `B` is the type of the accumulator.
503    /// `F` is the fallible function that takes the accumulator and
504    /// the next [`Entry`], possibly providing the next accumulator
505    /// value.
506    pub fn traverse<Error, B, F>(
507        &self,
508        repo: &Repository,
509        initial: B,
510        f: &mut F,
511    ) -> Result<B, Error>
512    where
513        Error: From<error::Directory>,
514        F: FnMut(B, &Entry) -> Result<B, Error>,
515    {
516        self.entries(repo)?
517            .entries()
518            .try_fold(initial, |acc, entry| match entry {
519                Entry::File(_) => f(acc, entry),
520                Entry::Directory(directory) => {
521                    let acc = directory.traverse(repo, acc, f)?;
522                    f(acc, entry)
523                }
524                Entry::Submodule(_) => f(acc, entry),
525            })
526    }
527}
528
529impl Revision for Directory {
530    type Error = Infallible;
531
532    fn object_id(&self, _repo: &Repository) -> Result<Oid, Self::Error> {
533        Ok(self.id)
534    }
535}
536
537/// A representation of a Git [submodule] when encountered in a Git
538/// repository.
539///
540/// [submodule]: https://git-scm.com/book/en/v2/Git-Tools-Submodules
541#[derive(Debug, Clone, PartialEq, Eq)]
542pub struct Submodule {
543    name: String,
544    prefix: PathBuf,
545    id: Oid,
546    url: Option<Url>,
547}
548
549impl Submodule {
550    /// Construct a new `Submodule`.
551    ///
552    /// The `path` must be the prefix location of the directory, and
553    /// so should not end in `name`.
554    ///
555    /// The `id` is the commit pointer that Git provides when listing
556    /// a submodule.
557    pub fn new(
558        name: String,
559        prefix: PathBuf,
560        submodule: Option<git2::Submodule>,
561        id: Oid,
562    ) -> Result<Self, error::Submodule> {
563        let url = submodule
564            .and_then(|module| {
565                module
566                    .opt_url_bytes()
567                    .map(|bs| std::str::from_utf8(bs).map(|url| url.to_string()))
568            })
569            .transpose()
570            .map_err(|err| error::Submodule::Utf8 {
571                name: name.clone(),
572                err,
573            })?;
574        let url = url
575            .map(|url| {
576                Url::parse(&url).map_err(|err| error::Submodule::ParseUrl {
577                    name: name.clone(),
578                    url,
579                    err,
580                })
581            })
582            .transpose()?;
583        Ok(Self {
584            name,
585            prefix,
586            id,
587            url,
588        })
589    }
590
591    /// The name of this `Submodule`.
592    pub fn name(&self) -> &String {
593        &self.name
594    }
595
596    /// Return the [`Path`] where this `Submodule` is located, relative to the
597    /// git repository root.
598    pub fn location(&self) -> &Path {
599        &self.prefix
600    }
601
602    /// Return the exact path for this `Submodule`, including the
603    /// `name` of the submodule itself.
604    ///
605    /// The path is relative to the git repository root.
606    pub fn path(&self) -> PathBuf {
607        self.prefix.join(escaped_name(&self.name))
608    }
609
610    /// The object identifier of this `Submodule`.
611    ///
612    /// Note that this does not exist in the parent `Repository`. A
613    /// new `Repository` should be opened for the submodule.
614    pub fn id(&self) -> Oid {
615        self.id
616    }
617
618    /// The URL for the submodule, if it is defined.
619    pub fn url(&self) -> &Option<Url> {
620        &self.url
621    }
622}
623
624/// When we need to escape "\" (represented as `\\`) for `PathBuf`
625/// so that it can be processed correctly.
626fn escaped_name(name: &str) -> String {
627    name.replace('\\', r"\\")
628}