gix_glob/search/pattern.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165
use std::{
io::Read,
path::{Path, PathBuf},
};
use bstr::{BStr, BString, ByteSlice, ByteVec};
use crate::{pattern::Case, search::Pattern};
/// A list of patterns which optionally know where they were loaded from and what their base is.
///
/// Knowing their base which is relative to a source directory, it will ignore all path to match against
/// that don't also start with said base.
#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Default)]
pub struct List<T: Pattern> {
/// Patterns and their associated data in the order they were loaded in or specified,
/// the line number in its source file or its sequence number (_`(pattern, value, line_number)`_).
///
/// During matching, this order is reversed.
pub patterns: Vec<Mapping<T::Value>>,
/// The path from which the patterns were read, or `None` if the patterns
/// don't originate in a file on disk.
pub source: Option<PathBuf>,
/// The parent directory of source, or `None` if the patterns are _global_ to match against the repository root.
/// It's processed to contain slashes only and to end with a trailing slash, and is relative to the repository root.
pub base: Option<BString>,
}
/// An association of a pattern with its value, along with a sequence number providing a sort order in relation to its peers.
#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)]
pub struct Mapping<T> {
/// The pattern itself, like `/target/*`
pub pattern: crate::Pattern,
/// The value associated with the pattern.
pub value: T,
/// Typically the line number in the file the pattern was parsed from.
pub sequence_number: usize,
}
fn read_in_full_ignore_missing(path: &Path, follow_symlinks: bool, buf: &mut Vec<u8>) -> std::io::Result<bool> {
buf.clear();
let file = if follow_symlinks {
std::fs::File::open(path)
} else {
gix_features::fs::open_options_no_follow().read(true).open(path)
};
Ok(match file {
Ok(mut file) => {
if let Err(err) = file.read_to_end(buf) {
if io_err_is_dir(&err) {
false
} else {
return Err(err);
}
} else {
true
}
}
Err(err) if err.kind() == std::io::ErrorKind::NotFound || io_err_is_dir(&err) => false,
Err(err) => return Err(err),
})
}
fn io_err_is_dir(err: &std::io::Error) -> bool {
// TODO: use the enum variant NotADirectory for this once stabilized
let raw = err.raw_os_error();
raw == Some(if cfg!(windows) { 5 } else { 21 }) /* Not a directory */
/* Also that, but under different circumstances */
|| raw == Some(20)
}
/// Instantiation
impl<T> List<T>
where
T: Pattern,
{
/// `source_file` is the location of the `bytes` which represents a list of patterns, one pattern per line.
/// If `root` is `Some(…)` it's used to see `source_file` as relative to itself, if `source_file` is absolute.
/// If source is relative and should be treated as base, set `root` to `Some("")`.
pub fn from_bytes(bytes: &[u8], source_file: PathBuf, root: Option<&Path>) -> Self {
let patterns = T::bytes_to_patterns(bytes, source_file.as_path());
let base = root
.and_then(|root| source_file.parent().expect("file").strip_prefix(root).ok())
.and_then(|base| {
(!base.as_os_str().is_empty()).then(|| {
let mut base: BString =
gix_path::to_unix_separators_on_windows(gix_path::into_bstr(base)).into_owned();
base.push_byte(b'/');
base
})
});
List {
patterns,
source: Some(source_file),
base,
}
}
/// Create a pattern list from the `source` file, which may be located underneath `root`, while optionally
/// following symlinks with `follow_symlinks`, providing `buf` to temporarily store the data contained in the file.
pub fn from_file(
source: impl Into<PathBuf>,
root: Option<&Path>,
follow_symlinks: bool,
buf: &mut Vec<u8>,
) -> std::io::Result<Option<Self>> {
let source = source.into();
Ok(read_in_full_ignore_missing(&source, follow_symlinks, buf)?.then(|| Self::from_bytes(buf, source, root)))
}
}
/// Utilities
impl<T> List<T>
where
T: Pattern,
{
/// If this list is anchored to a base path, return `relative_path` as being relative to our base and return
/// an updated `basename_pos` as well if it was set.
/// `case` is respected for the comparison.
///
/// This is useful to turn repository-relative paths into paths relative to a particular search base.
pub fn strip_base_handle_recompute_basename_pos<'a>(
&self,
relative_path: &'a BStr,
basename_pos: Option<usize>,
case: Case,
) -> Option<(&'a BStr, Option<usize>)> {
match self.base.as_deref() {
Some(base) => strip_base_handle_recompute_basename_pos(base.as_bstr(), relative_path, basename_pos, case)?,
None => (relative_path, basename_pos),
}
.into()
}
}
/// Return`relative_path` as being relative to `base` along with an updated `basename_pos` if it was set.
/// `case` is respected for the comparison.
///
/// This is useful to turn repository-relative paths into paths relative to a particular search base.
pub fn strip_base_handle_recompute_basename_pos<'a>(
base: &BStr,
relative_path: &'a BStr,
basename_pos: Option<usize>,
case: Case,
) -> Option<(&'a BStr, Option<usize>)> {
Some((
match case {
Case::Sensitive => relative_path.strip_prefix(base.as_bytes())?.as_bstr(),
Case::Fold => {
let rela_dir = relative_path.get(..base.len())?;
if !rela_dir.eq_ignore_ascii_case(base) {
return None;
}
&relative_path[base.len()..]
}
},
basename_pos.and_then(|pos| {
let pos = pos - base.len();
(pos != 0).then_some(pos)
}),
))
}