sanitize_filename/
lib.rs

1use std::sync::OnceLock;
2
3extern crate regex;
4use regex::{Regex, RegexBuilder};
5
6static ILLEGAL_RE: OnceLock<Regex> = OnceLock::new();
7static CONTROL_RE: OnceLock<Regex> = OnceLock::new();
8static RESERVED_RE: OnceLock<Regex> = OnceLock::new();
9static WINDOWS_RESERVED_RE: OnceLock<Regex> = OnceLock::new();
10static WINDOWS_TRAILING_RE: OnceLock<Regex> = OnceLock::new();
11
12fn illegal_re() -> &'static Regex {
13    ILLEGAL_RE.get_or_init(|| Regex::new(r#"[/\?<>\\:\*\|":]"#).unwrap())
14}
15
16fn control_re() -> &'static Regex {
17    CONTROL_RE.get_or_init(|| Regex::new(r#"[\x00-\x1f\x80-\x9f]"#).unwrap())
18}
19
20fn reserved_re() -> &'static Regex {
21    RESERVED_RE.get_or_init(|| Regex::new(r#"^\.+$"#).unwrap())
22}
23
24fn windows_reserved_re() -> &'static Regex {
25    WINDOWS_RESERVED_RE.get_or_init(|| {
26        RegexBuilder::new(r#"(?i)^(con|prn|aux|nul|com[0-9]|lpt[0-9])(\..*)?$"#)
27            .case_insensitive(true)
28            .build()
29            .unwrap()
30    })
31}
32
33fn windows_trailing_re() -> &'static Regex {
34    WINDOWS_TRAILING_RE.get_or_init(|| Regex::new(r#"[\. ]+$"#).unwrap())
35}
36
37#[derive(Clone)]
38pub struct Options<'a> {
39    pub windows: bool,
40    pub truncate: bool,
41    pub replacement: &'a str,
42}
43
44impl<'a> Default for Options<'a> {
45    fn default() -> Self {
46        Options {
47            windows: cfg!(windows),
48            truncate: true,
49            replacement: "",
50        }
51    }
52}
53
54pub fn sanitize<S: AsRef<str>>(name: S) -> String {
55    sanitize_with_options(name, Options::default())
56}
57
58pub fn sanitize_with_options<S: AsRef<str>>(name: S, options: Options) -> String {
59    let Options {
60        windows,
61        truncate,
62        replacement,
63    } = options;
64    let name = name.as_ref();
65
66    let name = illegal_re().replace_all(&name, replacement);
67    let name = control_re().replace_all(&name, replacement);
68    let name = reserved_re().replace(&name, replacement);
69
70    let collect = |name: ::std::borrow::Cow<str>| {
71        if truncate && name.len() > 255 {
72            let mut end = 255;
73            loop {
74                if name.is_char_boundary(end) {
75                    break;
76                }
77                end -= 1;
78            }
79            String::from(&name[..end])
80        } else {
81            String::from(name)
82        }
83    };
84
85    if windows {
86        let name = windows_reserved_re().replace(&name, replacement);
87        let name = windows_trailing_re().replace(&name, replacement);
88        collect(name)
89    } else {
90        collect(name)
91    }
92}
93
94#[derive(Clone)]
95pub struct OptionsForCheck {
96    pub windows: bool,
97    pub truncate: bool,
98}
99
100impl Default for OptionsForCheck {
101    fn default() -> Self {
102        OptionsForCheck {
103            windows: cfg!(windows),
104            truncate: true,
105        }
106    }
107}
108
109pub fn is_sanitized<S: AsRef<str>>(name: S) -> bool {
110    is_sanitized_with_options(name, OptionsForCheck::default())
111}
112
113pub fn is_sanitized_with_options<S: AsRef<str>>(name: S, options: OptionsForCheck) -> bool {
114    let OptionsForCheck { windows, truncate } = options;
115    let name = name.as_ref();
116
117    if illegal_re().is_match(&name) {
118        return false;
119    }
120    if control_re().is_match(&name) {
121        return false;
122    }
123    if reserved_re().is_match(&name) {
124        return false;
125    }
126    if truncate && name.len() > 255 {
127        return false;
128    }
129    if windows {
130        if windows_reserved_re().is_match(&name) {
131            return false;
132        }
133        if windows_trailing_re().is_match(&name) {
134            return false;
135        }
136    }
137
138    return true;
139}
140
141#[cfg(test)]
142mod tests {
143
144    // From https://github.com/parshap/node-sanitize-filename/blob/master/test.js
145    static NAMES: &'static [&'static str] = &[
146        "the quick brown fox jumped over the lazy dog",
147        "résumé",
148        "hello\u{0000}world",
149        "hello\nworld",
150        "semi;colon.js",
151        ";leading-semi.js",
152        "slash\\.js",
153        "slash/.js",
154        "col:on.js",
155        "star*.js",
156        "question?.js",
157        "quote\".js",
158        "singlequote'.js",
159        "brack<e>ts.js",
160        "p|pes.js",
161        "plus+.js",
162        "'five and six<seven'.js",
163        " space at front",
164        "space at end ",
165        ".period",
166        "period.",
167        "relative/path/to/some/dir",
168        "/abs/path/to/some/dir",
169        "~/.\u{0000}notssh/authorized_keys",
170        "",
171        "h?w",
172        "h/w",
173        "h*w",
174        ".",
175        "..",
176        "./",
177        "../",
178        "/..",
179        "/../",
180        "*.|.",
181        "./",
182        "./foobar",
183        "../foobar",
184        "../../foobar",
185        "./././foobar",
186        "|*.what",
187        "LPT9.asdf",
188        "foobar...",
189    ];
190
191    static NAMES_CLEANED: &'static [&'static str] = &[
192        "the quick brown fox jumped over the lazy dog",
193        "résumé",
194        "helloworld",
195        "helloworld",
196        "semi;colon.js",
197        ";leading-semi.js",
198        "slash.js",
199        "slash.js",
200        "colon.js",
201        "star.js",
202        "question.js",
203        "quote.js",
204        "singlequote'.js",
205        "brackets.js",
206        "ppes.js",
207        "plus+.js",
208        "'five and sixseven'.js",
209        " space at front",
210        "space at end",
211        ".period",
212        "period",
213        "relativepathtosomedir",
214        "abspathtosomedir",
215        "~.notsshauthorized_keys",
216        "",
217        "hw",
218        "hw",
219        "hw",
220        "",
221        "",
222        "",
223        "",
224        "",
225        "",
226        "",
227        "",
228        ".foobar",
229        "..foobar",
230        "....foobar",
231        "...foobar",
232        ".what",
233        "",
234        "foobar",
235    ];
236
237    static NAMES_IS_SANITIZED: &'static [bool] = &[
238        true, true, false, false, true, true, false, false, false, false, false, false, true,
239        false, false, true, false, true, false, true, false, false, false, false, true, false,
240        false, false, false, false, false, false, false, false, false, false, false, false, false,
241        false, false, false, false,
242    ];
243
244    #[test]
245    fn it_works() {
246        // sanitize
247        let options = super::Options {
248            windows: true,
249            truncate: true,
250            replacement: "",
251        };
252
253        for (idx, name) in NAMES.iter().enumerate() {
254            assert_eq!(
255                super::sanitize_with_options(name, options.clone()),
256                NAMES_CLEANED[idx]
257            );
258        }
259
260        let long = ::std::iter::repeat('a').take(300).collect::<String>();
261        let shorter = ::std::iter::repeat('a').take(255).collect::<String>();
262        assert_eq!(super::sanitize_with_options(long, options.clone()), shorter);
263
264        // is_sanitized
265        let options = super::OptionsForCheck {
266            windows: true,
267            truncate: true,
268        };
269
270        for (idx, name) in NAMES.iter().enumerate() {
271            assert_eq!(
272                super::is_sanitized_with_options(name, options.clone()),
273                NAMES_IS_SANITIZED[idx]
274            );
275        }
276
277        let long = ::std::iter::repeat('a').take(300).collect::<String>();
278        assert_eq!(
279            super::is_sanitized_with_options(long, options.clone()),
280            false
281        );
282    }
283}