mime_sniffer/
api.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
use url::Url;
use mime::Mime;

use magic::{is_unknown_mime_type, sniff_mime_type, sniff_mime_type_from_local_data};

/// Extension methods for MIME type sniffer
pub trait MimeTypeSniffer {
    /// sniff content for MIME type
    fn sniff_mime_type(&self) -> Option<&str>;
}

/// Extension methods for MIME type sniffer
pub trait MimeTypeSnifferExt: MimeTypeSniffer {
    /// sniff content for MIME type
    fn sniff_mime_type_ext(&self) -> Option<Mime> {
        self.sniff_mime_type()
            .and_then(|mime_type| mime_type.parse().ok())
    }
}

impl<T: MimeTypeSniffer> MimeTypeSnifferExt for T {}

/// Should we sniff content for MIME type
pub trait MimeTypeSniffable {
    /// should we sniff content
    fn should_sniff_mime_type(&self) -> bool;
}

impl<T: AsRef<[u8]>> MimeTypeSniffer for T {
    fn sniff_mime_type(&self) -> Option<&str> {
        sniff_mime_type_from_local_data(self.as_ref())
    }
}

impl<T: AsRef<[u8]>> MimeTypeSniffable for T {
    fn should_sniff_mime_type(&self) -> bool {
        true
    }
}

/// HTTP request with content, URL and MIME type hint.
pub struct HttpRequest<'a, T: 'a + AsRef<[u8]>, U: 'a + AsRef<str>> {
    pub content: &'a T,
    pub url: &'a U,
    pub type_hint: &'a str,
}

impl<'a, T: 'a + AsRef<[u8]>, U: 'a + AsRef<str>> MimeTypeSniffer for HttpRequest<'a, T, U> {
    fn sniff_mime_type(&self) -> Option<&str> {
        sniff_mime_type(self.content.as_ref(), self.url.as_ref(), self.type_hint)
    }
}

const SNIFFABLE_TYPES: &'static [&'static str] = &[
    // Many web servers are misconfigured to send text/plain for many
    // different types of content.
    "text/plain",
    // We want to sniff application/octet-stream for
    // application/x-chrome-extension, but nothing else.
    "application/octet-stream",
    // XHTML and Atom/RSS feeds are often served as plain xml instead of
    // their more specific mime types.
    "text/xml",
    "application/xml",
    // Check for false Microsoft Office MIME types.
    "application/msword",
    "application/vnd.ms-excel",
    "application/vnd.ms-powerpoint",
    "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
    "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
    "application/vnd.openxmlformats-officedocument.presentationml.presentation",
    "application/vnd.ms-excel.sheet.macroenabled.12",
    "application/vnd.ms-word.document.macroenabled.12",
    "application/vnd.ms-powerpoint.presentation.macroenabled.12",
    "application/mspowerpoint",
    "application/msexcel",
    "application/vnd.ms-word",
    "application/vnd.ms-word.document.12",
    "application/vnd.msword",
];

impl<'a, T: 'a + AsRef<[u8]>> MimeTypeSniffable for HttpRequest<'a, T, Url> {
    fn should_sniff_mime_type(&self) -> bool {
        match self.url.scheme() {
            "" | "http" | "https" | "ftp" | "content" | "file" => {
                SNIFFABLE_TYPES
                    .iter()
                    .any(|&mime_type| mime_type == self.type_hint)
                    || is_unknown_mime_type(self.type_hint)
            }
            _ => false,
        }
    }
}

#[cfg(test)]
mod tests {
    use url::Url;

    use super::*;

    #[test]
    fn test_mime_type_sniffer() {
        assert_eq!(b"%PDF-1.5".sniff_mime_type(), Some("application/pdf"));
    }

    #[test]
    fn test_request_sniffer() {
        let url = Url::parse("http://localhost/notes.ppt").unwrap();
        let req = HttpRequest {
            content: b"\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1",
            url: &url,
            type_hint: "text/plain",
        };

        assert!(req.should_sniff_mime_type());
        assert_eq!(req.sniff_mime_type(), Some("application/vnd.ms-powerpoint"));
        assert_eq!(
            req.sniff_mime_type_ext().unwrap(),
            "application/vnd.ms-powerpoint".parse::<Mime>().unwrap()
        );
    }
}