mime_sniffer/
api.rs

1use mime::Mime;
2use url::Url;
3
4use crate::magic::{is_unknown_mime_type, sniff_mime_type, sniff_mime_type_from_local_data};
5
6/// Extension methods for MIME type sniffer
7pub trait MimeTypeSniffer {
8    /// sniff content for MIME type
9    fn sniff_mime_type(&self) -> Option<&str>;
10}
11
12/// Extension methods for MIME type sniffer
13pub trait MimeTypeSnifferExt: MimeTypeSniffer {
14    /// sniff content for MIME type
15    fn sniff_mime_type_ext(&self) -> Option<Mime> {
16        self.sniff_mime_type()
17            .and_then(|mime_type| mime_type.parse().ok())
18    }
19}
20
21impl<T: MimeTypeSniffer> MimeTypeSnifferExt for T {}
22
23/// Should we sniff content for MIME type
24pub trait MimeTypeSniffable {
25    /// should we sniff content
26    fn should_sniff_mime_type(&self) -> bool;
27}
28
29impl<T: AsRef<[u8]>> MimeTypeSniffer for T {
30    fn sniff_mime_type(&self) -> Option<&str> {
31        sniff_mime_type_from_local_data(self.as_ref())
32    }
33}
34
35impl<T: AsRef<[u8]>> MimeTypeSniffable for T {
36    fn should_sniff_mime_type(&self) -> bool {
37        true
38    }
39}
40
41/// HTTP request with content, URL and MIME type hint.
42pub struct HttpRequest<'a, T: 'a + AsRef<[u8]>, U: 'a + AsRef<str>> {
43    pub content: &'a T,
44    pub url: &'a U,
45    pub type_hint: &'a str,
46}
47
48impl<'a, T: 'a + AsRef<[u8]>, U: 'a + AsRef<str>> MimeTypeSniffer for HttpRequest<'a, T, U> {
49    fn sniff_mime_type(&self) -> Option<&str> {
50        sniff_mime_type(self.content.as_ref(), self.url.as_ref(), self.type_hint)
51    }
52}
53
54const SNIFFABLE_TYPES: &[&str] = &[
55    // Many web servers are misconfigured to send text/plain for many
56    // different types of content.
57    "text/plain",
58    // We want to sniff application/octet-stream for
59    // application/x-chrome-extension, but nothing else.
60    "application/octet-stream",
61    // XHTML and Atom/RSS feeds are often served as plain xml instead of
62    // their more specific mime types.
63    "text/xml",
64    "application/xml",
65    // Check for false Microsoft Office MIME types.
66    "application/msword",
67    "application/vnd.ms-excel",
68    "application/vnd.ms-powerpoint",
69    "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
70    "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
71    "application/vnd.openxmlformats-officedocument.presentationml.presentation",
72    "application/vnd.ms-excel.sheet.macroenabled.12",
73    "application/vnd.ms-word.document.macroenabled.12",
74    "application/vnd.ms-powerpoint.presentation.macroenabled.12",
75    "application/mspowerpoint",
76    "application/msexcel",
77    "application/vnd.ms-word",
78    "application/vnd.ms-word.document.12",
79    "application/vnd.msword",
80];
81
82impl<'a, T: 'a + AsRef<[u8]>> MimeTypeSniffable for HttpRequest<'a, T, Url> {
83    fn should_sniff_mime_type(&self) -> bool {
84        match self.url.scheme() {
85            "" | "http" | "https" | "ftp" | "content" | "file" => {
86                SNIFFABLE_TYPES
87                    .iter()
88                    .any(|&mime_type| mime_type == self.type_hint)
89                    || is_unknown_mime_type(self.type_hint)
90            }
91            _ => false,
92        }
93    }
94}
95
96#[cfg(test)]
97mod tests {
98    use super::*;
99
100    #[test]
101    fn test_mime_type_sniffer() {
102        assert_eq!(b"%PDF-1.5".sniff_mime_type(), Some("application/pdf"));
103    }
104
105    #[test]
106    fn test_request_sniffer() {
107        let url = Url::parse("http://localhost/notes.ppt").unwrap();
108        let req = HttpRequest {
109            content: b"\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1",
110            url: &url,
111            type_hint: "text/plain",
112        };
113
114        assert!(req.should_sniff_mime_type());
115        assert_eq!(req.sniff_mime_type(), Some("application/vnd.ms-powerpoint"));
116        assert_eq!(
117            req.sniff_mime_type_ext().unwrap(),
118            "application/vnd.ms-powerpoint".parse::<Mime>().unwrap()
119        );
120    }
121}