gix_filter/eol/utils.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124
use crate::eol::{AttributesDigest, AutoCrlf, Configuration, Mode, Stats};
impl Default for Mode {
fn default() -> Self {
if cfg!(windows) {
Mode::CrLf
} else {
Mode::Lf
}
}
}
impl AttributesDigest {
/// Return the end-of-line mode this digest would require, or `None` if no conversion would be performed.
pub fn to_eol(&self, config: Configuration) -> Option<Mode> {
Some(match self {
AttributesDigest::Binary => return None,
AttributesDigest::TextInput | AttributesDigest::TextAutoInput => Mode::Lf,
AttributesDigest::TextCrlf | AttributesDigest::TextAutoCrlf => Mode::CrLf,
AttributesDigest::Text | AttributesDigest::TextAuto => config.to_eol(),
})
}
/// Return true if this digest allows for auto-determination of CRLF text conversion.
pub fn is_auto_text(&self) -> bool {
matches!(
self,
AttributesDigest::TextAuto | AttributesDigest::TextAutoCrlf | AttributesDigest::TextAutoInput
)
}
}
impl Configuration {
/// Return the line-ending mode that is configured here.
pub fn to_eol(&self) -> Mode {
match self.auto_crlf {
AutoCrlf::Enabled => Mode::CrLf,
AutoCrlf::Input => Mode::Lf,
AutoCrlf::Disabled => self.eol.unwrap_or_default(),
}
}
}
impl Stats {
/// Gather statistics from the given `bytes`.
///
/// Note that the entire buffer will be scanned.
pub fn from_bytes(bytes: &[u8]) -> Self {
let mut bytes = bytes.iter().peekable();
let mut null = 0;
let mut lone_cr = 0;
let mut lone_lf = 0;
let mut crlf = 0;
let mut printable = 0;
let mut non_printable = 0;
while let Some(b) = bytes.next() {
if *b == b'\r' {
match bytes.peek() {
Some(n) if **n == b'\n' => {
bytes.next();
crlf += 1;
}
_ => lone_cr += 1,
}
continue;
}
if *b == b'\n' {
lone_lf += 1;
continue;
}
if *b == 127 {
non_printable += 1;
} else if *b < 32 {
match *b {
8 /* \b */ | b'\t' | 27 /* \033 */ | 12 /* \014 */ => printable += 1,
0 => {
non_printable += 1;
null += 1;
},
_ => non_printable += 1,
}
} else {
printable += 1;
}
}
Self {
null,
lone_cr,
lone_lf,
crlf,
printable,
non_printable,
}
}
/// Returns `true` if these statistics are typical for a binary file.
pub fn is_binary(&self) -> bool {
self.lone_cr > 0 || self.null > 0 || (self.printable >> 7) < self.non_printable
}
/// Return `true` if we would convert the buffer from which these stats are derived, knowing only the digest
pub fn will_convert_lf_to_crlf(&self, digest: AttributesDigest, config: Configuration) -> bool {
if digest.to_eol(config) != Some(Mode::CrLf) {
return false;
}
// nothing to do?
if self.lone_lf == 0 {
return false;
}
if digest.is_auto_text() {
if self.is_binary() {
return false;
}
// Lone `\r` or mixed LF and CRLF isn't safe as it won't round-trip, and in auto-mode we don't touch it.
if self.lone_cr > 0 || self.crlf > 0 {
return false;
}
}
true
}
}