Migrate mime_classifier top-level string to enum.

This commit is contained in:
Corey Farwell 2016-07-02 16:42:29 -04:00
parent 9d3fc76463
commit 66303e40b5
3 changed files with 89 additions and 84 deletions

View file

@ -2,6 +2,7 @@
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
use hyper::mime::TopLevel;
use net_traits::LoadContext;
use std::borrow::ToOwned;
@ -48,7 +49,8 @@ pub enum NoSniffFlag {
OFF
}
pub type MimeType = (String, String);
pub type MimeType = (TopLevel, String);
impl MimeClassifier {
//Performs MIME Type Sniffing Algorithm (sections 7 and 8)
@ -59,7 +61,7 @@ impl MimeClassifier {
supplied_type: &Option<MimeType>,
data: &[u8]) -> MimeType {
let supplied_type_or_octet_stream = supplied_type.clone()
.unwrap_or(("application".to_owned(),
.unwrap_or((TopLevel::Application,
"octet-stream".to_owned()));
match context {
LoadContext::Browsing => match *supplied_type {
@ -105,7 +107,7 @@ impl MimeClassifier {
// This section was *not* finalized in the specs at the time
// of this implementation.
match *supplied_type {
None => ("application".to_owned(), "octet-stream".to_owned()),
None => (TopLevel::Application, "octet-stream".to_owned()),
_ => supplied_type_or_octet_stream,
}
},
@ -115,7 +117,7 @@ impl MimeClassifier {
// This section was *not* finalized in the specs at the time
// of this implementation.
match *supplied_type {
None => ("text".to_owned(), "css".to_owned()),
None => (TopLevel::Text, "css".to_owned()),
_ => supplied_type_or_octet_stream,
}
},
@ -125,7 +127,7 @@ impl MimeClassifier {
// This section was *not* finalized in the specs at the time
// of this implementation.
match *supplied_type {
None => ("text".to_owned(), "javascript".to_owned()),
None => (TopLevel::Text, "javascript".to_owned()),
_ => supplied_type_or_octet_stream,
}
},
@ -141,14 +143,14 @@ impl MimeClassifier {
//
// This section was *not* finalized in the specs at the time
// of this implementation.
("text".to_owned(), "vtt".to_owned())
(TopLevel::Text, "vtt".to_owned())
},
LoadContext::CacheManifest => {
// 8.9 Sniffing in a cache manifest context
//
// This section was *not* finalized in the specs at the time
// of this implementation.
("text".to_owned(), "cache-manifest".to_owned())
(TopLevel::Text, "cache-manifest".to_owned())
},
}
}
@ -199,39 +201,41 @@ impl MimeClassifier {
self.binary_or_plaintext.classify(data).expect("BinaryOrPlaintextClassifier always succeeds")
}
fn is_xml(tp: &str, sub_tp: &str) -> bool {
fn is_xml(tp: &TopLevel, sub_tp: &str) -> bool {
sub_tp.ends_with("+xml") ||
match (tp, sub_tp) {
("application", "xml") | ("text", "xml") => true,
(&TopLevel::Application, "xml") | (&TopLevel::Text, "xml") => true,
_ => false
}
}
fn is_html(tp: &str, sub_tp: &str) -> bool {
tp == "text" && sub_tp == "html"
fn is_html(tp: &TopLevel, sub_tp: &str) -> bool {
*tp == TopLevel::Text && sub_tp == "html"
}
fn is_image(tp: &str) -> bool {
tp == "image"
fn is_image(tp: &TopLevel) -> bool {
*tp == TopLevel::Image
}
fn is_audio_video(tp: &str, sub_tp: &str) -> bool {
tp == "audio" ||
tp == "video" ||
(tp == "application" && sub_tp == "ogg")
fn is_audio_video(tp: &TopLevel, sub_tp: &str) -> bool {
*tp == TopLevel::Audio ||
*tp == TopLevel::Video ||
(*tp == TopLevel::Application && sub_tp == "ogg")
}
fn is_explicit_unknown(tp: &str, sub_tp: &str) -> bool {
match(tp, sub_tp) {
("unknown", "unknown") |
("application", "unknown") |
("*", "*") => true,
fn is_explicit_unknown(tp: &TopLevel, sub_tp: &str) -> bool {
if let TopLevel::Ext(ref e) = *tp {
return e == "unknown" && sub_tp == "unknown";
}
match (tp, sub_tp) {
(&TopLevel::Application, "unknown") |
(&TopLevel::Star, "*") => true,
_ => false
}
}
fn get_media_type(media_type: &str,
media_subtype: &str) -> Option<MediaType> {
fn get_media_type(media_type: &TopLevel,
media_subtype: &str) -> Option<MediaType> {
if MimeClassifier::is_xml(media_type, media_subtype) {
Some(MediaType::Xml)
} else if MimeClassifier::is_html(media_type, media_subtype) {
@ -252,7 +256,7 @@ impl MimeClassifier {
}
}
pub fn as_string_option(tup: Option<(&'static str, &'static str)>) -> Option<MimeType> {
pub fn as_string_option(tup: Option<(TopLevel, &'static str)>) -> Option<MimeType> {
tup.map(|(a, b)| (a.to_owned(), b.to_owned()))
}
@ -299,7 +303,7 @@ struct ByteMatcher {
pattern: &'static [u8],
mask: &'static [u8],
leading_ignore: &'static [u8],
content_type: (&'static str, &'static str)
content_type: (TopLevel, &'static str)
}
impl ByteMatcher {
@ -403,7 +407,7 @@ impl Mp4Matcher {
impl MIMEChecker for Mp4Matcher {
fn classify(&self, data: &[u8]) -> Option<MimeType> {
if self.matches(data) {
Some(("video".to_owned(), "mp4".to_owned()))
Some((TopLevel::Video, "mp4".to_owned()))
} else {
None
}
@ -417,19 +421,19 @@ impl MIMEChecker for Mp4Matcher {
struct BinaryOrPlaintextClassifier;
impl BinaryOrPlaintextClassifier {
fn classify_impl(&self, data: &[u8]) -> (&'static str, &'static str) {
fn classify_impl(&self, data: &[u8]) -> (TopLevel, &'static str) {
if data.starts_with(&[0xFFu8, 0xFEu8]) ||
data.starts_with(&[0xFEu8, 0xFFu8]) ||
data.starts_with(&[0xEFu8, 0xBBu8, 0xBFu8])
{
("text", "plain")
(TopLevel::Text, "plain")
} else if data.iter().any(|&x| x <= 0x08u8 ||
x == 0x0Bu8 ||
(x >= 0x0Eu8 && x <= 0x1Au8) ||
(x >= 0x1Cu8 && x <= 0x1Fu8)) {
("application", "octet-stream")
(TopLevel::Application, "octet-stream")
} else {
("text", "plain")
(TopLevel::Text, "plain")
}
}
}
@ -587,7 +591,7 @@ where T: Iterator<Item=&'a u8> + Clone {
struct FeedsClassifier;
impl FeedsClassifier {
// Implements sniffing for mislabeled feeds (https://mimesniff.spec.whatwg.org/#sniffing-a-mislabeled-feed)
fn classify_impl(&self, data: &[u8]) -> Option<(&'static str, &'static str)> {
fn classify_impl(&self, data: &[u8]) -> Option<(TopLevel, &'static str)> {
// Step 4: can not be feed unless length is > 3
if data.len() < 3 {
return None;
@ -618,11 +622,11 @@ impl FeedsClassifier {
// Step 5.2.5
if matcher.matches(b"rss") {
return Some(("application", "rss+xml"));
return Some((TopLevel::Application, "rss+xml"));
}
// Step 5.2.6
if matcher.matches(b"feed") {
return Some(("application", "atom+xml"));
return Some((TopLevel::Application, "atom+xml"));
}
// Step 5.2.7
if matcher.matches(b"rdf:RDF") {
@ -633,7 +637,7 @@ impl FeedsClassifier {
.chain(|| eats_until(&mut matcher,
b"http://www.w3.org/1999/02/22-rdf-syntax-ns#",
b"http://purl.org/rss/1.0/")) {
Match::StartAndEnd => return Some(("application", "rss+xml")),
Match::StartAndEnd => return Some((TopLevel::Application, "rss+xml")),
Match::DidNotMatch => {},
Match::Start => return None
}
@ -662,7 +666,7 @@ impl ByteMatcher {
ByteMatcher {
pattern: b"\x00\x00\x01\x00",
mask: b"\xFF\xFF\xFF\xFF",
content_type: ("image", "x-icon"),
content_type: (TopLevel::Image, "x-icon"),
leading_ignore: &[]
}
}
@ -671,7 +675,7 @@ impl ByteMatcher {
ByteMatcher {
pattern: b"\x00\x00\x02\x00",
mask: b"\xFF\xFF\xFF\xFF",
content_type: ("image", "x-icon"),
content_type: (TopLevel::Image, "x-icon"),
leading_ignore: &[]
}
}
@ -680,7 +684,7 @@ impl ByteMatcher {
ByteMatcher {
pattern: b"BM",
mask: b"\xFF\xFF",
content_type: ("image", "bmp"),
content_type: (TopLevel::Image, "bmp"),
leading_ignore: &[]
}
}
@ -689,7 +693,7 @@ impl ByteMatcher {
ByteMatcher {
pattern: b"GIF89a",
mask: b"\xFF\xFF\xFF\xFF\xFF\xFF",
content_type: ("image", "gif"),
content_type: (TopLevel::Image, "gif"),
leading_ignore: &[]
}
}
@ -698,7 +702,7 @@ impl ByteMatcher {
ByteMatcher {
pattern: b"GIF87a",
mask: b"\xFF\xFF\xFF\xFF\xFF\xFF",
content_type: ("image", "gif"),
content_type: (TopLevel::Image, "gif"),
leading_ignore: &[]
}
}
@ -707,7 +711,7 @@ impl ByteMatcher {
ByteMatcher {
pattern: b"RIFF\x00\x00\x00\x00WEBPVP",
mask: b"\xFF\xFF\xFF\xFF\x00\x00\x00\x00\xFF\xFF\xFF\xFF\xFF\xFF",
content_type: ("image", "webp"),
content_type: (TopLevel::Image, "webp"),
leading_ignore: &[]
}
}
@ -717,7 +721,7 @@ impl ByteMatcher {
ByteMatcher {
pattern: b"\x89PNG\r\n\x1A\n",
mask: b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF",
content_type: ("image", "png"),
content_type: (TopLevel::Image, "png"),
leading_ignore: &[]
}
}
@ -726,7 +730,7 @@ impl ByteMatcher {
ByteMatcher {
pattern: b"\xFF\xD8\xFF",
mask: b"\xFF\xFF\xFF",
content_type: ("image", "jpeg"),
content_type: (TopLevel::Image, "jpeg"),
leading_ignore: &[]
}
}
@ -735,7 +739,7 @@ impl ByteMatcher {
ByteMatcher {
pattern: b"\x1A\x45\xDF\xA3",
mask: b"\xFF\xFF\xFF\xFF",
content_type: ("video", "webm"),
content_type: (TopLevel::Video, "webm"),
leading_ignore: &[]
}
}
@ -744,7 +748,7 @@ impl ByteMatcher {
ByteMatcher {
pattern: b".snd",
mask: b"\xFF\xFF\xFF\xFF",
content_type: ("audio", "basic"),
content_type: (TopLevel::Audio, "basic"),
leading_ignore: &[]
}
}
@ -753,7 +757,7 @@ impl ByteMatcher {
ByteMatcher {
pattern: b"FORM\x00\x00\x00\x00AIFF",
mask: b"\xFF\xFF\xFF\xFF\x00\x00\x00\x00\xFF\xFF\xFF\xFF",
content_type: ("audio", "aiff"),
content_type: (TopLevel::Audio, "aiff"),
leading_ignore: &[]
}
}
@ -762,7 +766,7 @@ impl ByteMatcher {
ByteMatcher {
pattern: b"ID3",
mask: b"\xFF\xFF\xFF",
content_type: ("audio", "mpeg"),
content_type: (TopLevel::Audio, "mpeg"),
leading_ignore: &[]
}
}
@ -771,7 +775,7 @@ impl ByteMatcher {
ByteMatcher {
pattern: b"OggS\x00",
mask: b"\xFF\xFF\xFF\xFF\xFF",
content_type: ("application", "ogg"),
content_type: (TopLevel::Application, "ogg"),
leading_ignore: &[]
}
}
@ -781,7 +785,7 @@ impl ByteMatcher {
ByteMatcher {
pattern: b"MThd\x00\x00\x00\x06",
mask: b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF",
content_type: ("audio", "midi"),
content_type: (TopLevel::Audio, "midi"),
leading_ignore: &[]
}
}
@ -790,7 +794,7 @@ impl ByteMatcher {
ByteMatcher {
pattern: b"RIFF\x00\x00\x00\x00AVI ",
mask: b"\xFF\xFF\xFF\xFF\x00\x00\x00\x00\xFF\xFF\xFF\xFF",
content_type: ("video", "avi"),
content_type: (TopLevel::Video, "avi"),
leading_ignore: &[]
}
}
@ -799,7 +803,7 @@ impl ByteMatcher {
ByteMatcher {
pattern: b"RIFF\x00\x00\x00\x00WAVE",
mask: b"\xFF\xFF\xFF\xFF\x00\x00\x00\x00\xFF\xFF\xFF\xFF",
content_type: ("audio", "wave"),
content_type: (TopLevel::Audio, "wave"),
leading_ignore: &[]
}
}
@ -809,7 +813,7 @@ impl ByteMatcher {
matcher: ByteMatcher {
pattern: b"<!DOCTYPE HTML",
mask: b"\xFF\xFF\xDF\xDF\xDF\xDF\xDF\xDF\xDF\xFF\xDF\xDF\xDF\xDF",
content_type: ("text", "html"),
content_type: (TopLevel::Text, "html"),
leading_ignore: b"\t\n\x0C\r "
}
}
@ -821,7 +825,7 @@ impl ByteMatcher {
matcher: ByteMatcher {
pattern: b"<HTML",
mask: b"\xFF\xDF\xDF\xDF\xDF",
content_type: ("text", "html"),
content_type: (TopLevel::Text, "html"),
leading_ignore: b"\t\n\x0C\r "
}
}
@ -833,7 +837,7 @@ impl ByteMatcher {
matcher: ByteMatcher {
pattern: b"<HEAD",
mask: b"\xFF\xDF\xDF\xDF\xDF",
content_type: ("text", "html"),
content_type: (TopLevel::Text, "html"),
leading_ignore: b"\t\n\x0C\r "
}
}
@ -845,7 +849,7 @@ impl ByteMatcher {
matcher: ByteMatcher {
pattern: b"<SCRIPT",
mask: b"\xFF\xDF\xDF\xDF\xDF\xDF\xDF",
content_type: ("text", "html"),
content_type: (TopLevel::Text, "html"),
leading_ignore: b"\t\n\x0C\r "
}
}
@ -857,7 +861,7 @@ impl ByteMatcher {
matcher: ByteMatcher {
pattern: b"<IFRAME",
mask: b"\xFF\xDF\xDF\xDF\xDF\xDF\xDF",
content_type: ("text", "html"),
content_type: (TopLevel::Text, "html"),
leading_ignore: b"\t\n\x0C\r "
}
}
@ -869,7 +873,7 @@ impl ByteMatcher {
matcher: ByteMatcher {
pattern: b"<H1",
mask: b"\xFF\xDF\xFF",
content_type: ("text", "html"),
content_type: (TopLevel::Text, "html"),
leading_ignore: b"\t\n\x0C\r "
}
}
@ -881,7 +885,7 @@ impl ByteMatcher {
matcher: ByteMatcher {
pattern: b"<DIV",
mask: b"\xFF\xDF\xDF\xDF",
content_type: ("text", "html"),
content_type: (TopLevel::Text, "html"),
leading_ignore: b"\t\n\x0C\r "
}
}
@ -893,7 +897,7 @@ impl ByteMatcher {
matcher: ByteMatcher {
pattern: b"<FONT",
mask: b"\xFF\xDF\xDF\xDF\xDF",
content_type: ("text", "html"),
content_type: (TopLevel::Text, "html"),
leading_ignore: b"\t\n\x0C\r "
}
}
@ -905,7 +909,7 @@ impl ByteMatcher {
matcher: ByteMatcher {
pattern: b"<TABLE",
mask: b"\xFF\xDF\xDF\xDF\xDF\xDF",
content_type: ("text", "html"),
content_type: (TopLevel::Text, "html"),
leading_ignore: b"\t\n\x0C\r "
}
}
@ -917,7 +921,7 @@ impl ByteMatcher {
matcher: ByteMatcher {
pattern: b"<A",
mask: b"\xFF\xDF",
content_type: ("text", "html"),
content_type: (TopLevel::Text, "html"),
leading_ignore: b"\t\n\x0C\r "
}
}
@ -929,7 +933,7 @@ impl ByteMatcher {
matcher: ByteMatcher {
pattern: b"<STYLE",
mask: b"\xFF\xDF\xDF\xDF\xDF\xDF",
content_type: ("text", "html"),
content_type: (TopLevel::Text, "html"),
leading_ignore: b"\t\n\x0C\r "
}
}
@ -941,7 +945,7 @@ impl ByteMatcher {
matcher: ByteMatcher {
pattern: b"<TITLE",
mask: b"\xFF\xDF\xDF\xDF\xDF\xDF",
content_type: ("text", "html"),
content_type: (TopLevel::Text, "html"),
leading_ignore: b"\t\n\x0C\r "
}
}
@ -953,7 +957,7 @@ impl ByteMatcher {
matcher: ByteMatcher {
pattern: b"<B",
mask: b"\xFF\xDF",
content_type: ("text", "html"),
content_type: (TopLevel::Text, "html"),
leading_ignore: b"\t\n\x0C\r "
}
}
@ -965,7 +969,7 @@ impl ByteMatcher {
matcher: ByteMatcher {
pattern: b"<BODY",
mask: b"\xFF\xDF\xDF\xDF\xDF",
content_type: ("text", "html"),
content_type: (TopLevel::Text, "html"),
leading_ignore: b"\t\n\x0C\r "
}
}
@ -977,7 +981,7 @@ impl ByteMatcher {
matcher: ByteMatcher {
pattern: b"<BR",
mask: b"\xFF\xDF\xDF",
content_type: ("text", "html"),
content_type: (TopLevel::Text, "html"),
leading_ignore: b"\t\n\x0C\r "
}
}
@ -989,7 +993,7 @@ impl ByteMatcher {
matcher: ByteMatcher {
pattern: b"<P",
mask: b"\xFF\xDF",
content_type: ("text", "html"),
content_type: (TopLevel::Text, "html"),
leading_ignore: b"\t\n\x0C\r "
}
}
@ -1001,7 +1005,7 @@ impl ByteMatcher {
matcher: ByteMatcher {
pattern: b"<!--",
mask: b"\xFF\xFF\xFF\xFF",
content_type: ("text", "html"),
content_type: (TopLevel::Text, "html"),
leading_ignore: b"\t\n\x0C\r "
}
}
@ -1012,7 +1016,7 @@ impl ByteMatcher {
ByteMatcher {
pattern: b"<?xml",
mask: b"\xFF\xFF\xFF\xFF\xFF",
content_type: ("text", "xml"),
content_type: (TopLevel::Text, "xml"),
leading_ignore: b"\t\n\x0C\r "
}
}
@ -1021,7 +1025,7 @@ impl ByteMatcher {
ByteMatcher {
pattern: b"%PDF-",
mask: b"\xFF\xFF\xFF\xFF\xFF",
content_type: ("application", "pdf"),
content_type: (TopLevel::Application, "pdf"),
leading_ignore: &[]
}
}
@ -1034,7 +1038,7 @@ impl ByteMatcher {
mask: b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\
\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\
\x00\x00\xFF\xFF",
content_type: ("application", "vnd.ms-fontobject"),
content_type: (TopLevel::Application, "vnd.ms-fontobject"),
leading_ignore: &[]
}
}
@ -1043,7 +1047,7 @@ impl ByteMatcher {
ByteMatcher {
pattern: b"\x00\x01\x00\x00",
mask: b"\xFF\xFF\xFF\xFF",
content_type: ("application", "font-sfnt"),
content_type: (TopLevel::Application, "font-sfnt"),
leading_ignore: &[]
}
}
@ -1052,7 +1056,7 @@ impl ByteMatcher {
ByteMatcher {
pattern: b"OTTO",
mask: b"\xFF\xFF\xFF\xFF",
content_type: ("application", "font-sfnt"),
content_type: (TopLevel::Application, "font-sfnt"),
leading_ignore: &[]
}
}
@ -1061,7 +1065,7 @@ impl ByteMatcher {
ByteMatcher {
pattern: b"ttcf",
mask: b"\xFF\xFF\xFF\xFF",
content_type: ("application", "font-sfnt"),
content_type: (TopLevel::Application, "font-sfnt"),
leading_ignore: &[]
}
}
@ -1070,7 +1074,7 @@ impl ByteMatcher {
ByteMatcher {
pattern: b"wOFF",
mask: b"\xFF\xFF\xFF\xFF",
content_type: ("application", "font-woff"),
content_type: (TopLevel::Application, "font-woff"),
leading_ignore: &[]
}
}
@ -1079,7 +1083,7 @@ impl ByteMatcher {
ByteMatcher {
pattern: b"\x1F\x8B\x08",
mask: b"\xFF\xFF\xFF",
content_type: ("application", "x-gzip"),
content_type: (TopLevel::Application, "x-gzip"),
leading_ignore: &[]
}
}
@ -1088,7 +1092,7 @@ impl ByteMatcher {
ByteMatcher {
pattern: b"PK\x03\x04",
mask: b"\xFF\xFF\xFF\xFF",
content_type: ("application", "zip"),
content_type: (TopLevel::Application, "zip"),
leading_ignore: &[]
}
}
@ -1097,7 +1101,7 @@ impl ByteMatcher {
ByteMatcher {
pattern: b"Rar \x1A\x07\x00",
mask: b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF",
content_type: ("application", "x-rar-compressed"),
content_type: (TopLevel::Application, "x-rar-compressed"),
leading_ignore: &[]
}
}
@ -1106,7 +1110,7 @@ impl ByteMatcher {
ByteMatcher {
pattern: b"%!PS-Adobe-",
mask: b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF",
content_type: ("application", "postscript"),
content_type: (TopLevel::Application, "postscript"),
leading_ignore: &[]
}
}
@ -1115,7 +1119,7 @@ impl ByteMatcher {
ByteMatcher {
pattern: b"\xFE\xFF\x00\x00",
mask: b"\xFF\xFF\x00\x00",
content_type: ("text", "plain"),
content_type: (TopLevel::Text, "plain"),
leading_ignore: &[]
}
}
@ -1124,7 +1128,7 @@ impl ByteMatcher {
ByteMatcher {
pattern: b"\xFF\xFE\x00\x00",
mask: b"\xFF\xFF\x00\x00",
content_type: ("text", "plain"),
content_type: (TopLevel::Text, "plain"),
leading_ignore: &[]
}
}
@ -1133,7 +1137,7 @@ impl ByteMatcher {
ByteMatcher {
pattern: b"\xEF\xBB\xBF\x00",
mask: b"\xFF\xFF\xFF\x00",
content_type: ("text", "plain"),
content_type: (TopLevel::Text, "plain"),
leading_ignore: &[]
}
}