Resolves #4183 - Implemementing context-based MIME type sniffing

The version of the standard is not finalized at the time of this writing.
Specifications may be found here: https://mimesniff.spec.whatwg.org/#context-specific-sniffing .
This commit is contained in:
David Rajchenbach-Teller 2015-09-30 12:22:00 +02:00 committed by Josh Matthews
parent 66c8aa8cda
commit 1e81b8c133
18 changed files with 226 additions and 93 deletions

View file

@ -29,7 +29,11 @@ pub fn factory(mut load_data: LoadData,
headers: None,
status: Some(RawStatus(200, "OK".into())),
};
if let Ok(chan) = start_sending_sniffed_opt(start_chan, metadata, classifier, &[]) {
if let Ok(chan) = start_sending_sniffed_opt(start_chan,
metadata,
classifier,
&[],
load_data.context) {
let _ = chan.send(Done(Ok(())));
}
return

View file

@ -88,7 +88,11 @@ pub fn load(load_data: LoadData,
let mut metadata = Metadata::default(url);
metadata.set_content_type(content_type.as_ref());
if let Ok(chan) = start_sending_sniffed_opt(start_chan, metadata, classifier, &bytes) {
if let Ok(chan) = start_sending_sniffed_opt(start_chan,
metadata,
classifier,
&bytes,
load_data.context) {
let _ = chan.send(Payload(bytes));
let _ = chan.send(Done(Ok(())));
}

View file

@ -61,6 +61,7 @@ pub fn factory(load_data: LoadData,
classifier: Arc<MIMEClassifier>,
cancel_listener: CancellationListener) {
let url = load_data.url;
let context = load_data.context;
assert!(&*url.scheme == "file");
spawn_named("file_loader".to_owned(), move || {
let file_path: Result<PathBuf, ()> = url.to_file_path();
@ -77,7 +78,7 @@ pub fn factory(load_data: LoadData,
let mime_type = guess_mime_type(file_path.as_path());
metadata.set_content_type(Some(&mime_type));
let progress_chan = start_sending_sniffed(senders, metadata,
classifier, &buf);
classifier, &buf, context);
progress_chan.send(Payload(buf)).unwrap();
let read_result = read_all(reader, &progress_chan, &cancel_listener);
if let Ok(load_result) = read_result {
@ -94,7 +95,8 @@ pub fn factory(load_data: LoadData,
if let Ok(chan) = start_sending_sniffed_opt(senders,
metadata,
classifier,
&[]) {
&[],
context) {
let _ = chan.send(Done(Ok(())));
}
}
@ -108,7 +110,7 @@ pub fn factory(load_data: LoadData,
// http://doc.rust-lang.org/std/fs/struct.OpenOptions.html#method.open
// but, we'll go for a "file not found!"
let url = Url::parse("about:not-found").unwrap();
let load_data_404 = LoadData::new(url, None);
let load_data_404 = LoadData::new(context, url, None);
about_loader::factory(load_data_404, senders, classifier, cancel_listener)
}
}

View file

@ -26,7 +26,7 @@ use mime_classifier::MIMEClassifier;
use msg::constellation_msg::{PipelineId};
use net_traits::ProgressMsg::{Done, Payload};
use net_traits::hosts::replace_hosts;
use net_traits::{CookieSource, IncludeSubdomains, LoadConsumer, LoadData, Metadata};
use net_traits::{CookieSource, IncludeSubdomains, LoadConsumer, LoadContext, LoadData, Metadata};
use openssl::ssl::error::{SslError, OpensslError};
use openssl::ssl::{SSL_OP_NO_SSLV2, SSL_OP_NO_SSLV3, SSL_VERIFY_PEER, SslContext, SslMethod};
use resource_task::{CancellationListener, send_error, start_sending_sniffed_opt};
@ -135,6 +135,7 @@ fn load_for_consumer(load_data: LoadData,
let factory = NetworkHttpRequestFactory {
connector: connector,
};
let context = load_data.context.clone();
match load::<WrappedHttpRequest>(load_data, hsts_list,
cookie_jar, devtools_chan,
&factory, user_agent,
@ -160,14 +161,14 @@ fn load_for_consumer(load_data: LoadData,
let mut image = resources_dir_path();
image.push("badcert.html");
let load_data = LoadData::new(Url::from_file_path(&*image).unwrap(), None);
let load_data = LoadData::new(context, Url::from_file_path(&*image).unwrap(), None);
file_loader::factory(load_data, start_chan, classifier, cancel_listener)
}
Err(LoadError::ConnectionAborted(_)) => unreachable!(),
Ok(mut load_response) => {
let metadata = load_response.metadata.clone();
send_data(&mut load_response, start_chan, metadata, classifier, &cancel_listener)
send_data(context, &mut load_response, start_chan, metadata, classifier, &cancel_listener)
}
}
}
@ -765,7 +766,8 @@ pub fn load<A>(load_data: LoadData,
}
}
fn send_data<R: Read>(reader: &mut R,
fn send_data<R: Read>(context: LoadContext,
reader: &mut R,
start_chan: LoadConsumer,
metadata: Metadata,
classifier: Arc<MIMEClassifier>,
@ -775,7 +777,7 @@ fn send_data<R: Read>(reader: &mut R,
Ok(ReadResult::Payload(buf)) => buf,
_ => vec!(),
};
let p = match start_sending_sniffed_opt(start_chan, metadata, classifier, &buf) {
let p = match start_sending_sniffed_opt(start_chan, metadata, classifier, &buf, context) {
Ok(p) => p,
_ => return
};

View file

@ -8,7 +8,8 @@ use net_traits::image::base::{Image, load_from_memory};
use net_traits::image_cache_task::ImageResponder;
use net_traits::image_cache_task::{ImageCacheChan, ImageCacheCommand, ImageCacheTask, ImageState};
use net_traits::image_cache_task::{ImageCacheResult, ImageResponse, UsePlaceholder};
use net_traits::{AsyncResponseTarget, ControlMsg, LoadConsumer, LoadData, ResourceTask, ResponseAction};
use net_traits::{AsyncResponseTarget, ControlMsg, LoadConsumer, LoadData, ResourceTask};
use net_traits::{ResponseAction, LoadContext};
use std::borrow::ToOwned;
use std::collections::HashMap;
use std::collections::hash_map::Entry::{Occupied, Vacant};
@ -423,7 +424,7 @@ impl ImageCache {
CacheResult::Miss => {
// A new load request! Request the load from
// the resource task.
let load_data = LoadData::new((*ref_url).clone(), None);
let load_data = LoadData::new(LoadContext::Image, (*ref_url).clone(), None);
let (action_sender, action_receiver) = ipc::channel().unwrap();
let response_target = AsyncResponseTarget {
sender: action_sender,

View file

@ -2,6 +2,7 @@
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
use net_traits::LoadContext;
use std::borrow::ToOwned;
pub struct MIMEClassifier {
@ -11,7 +12,8 @@ pub struct MIMEClassifier {
plaintext_classifier: GroupedClassifier,
archive_classifier: GroupedClassifier,
binary_or_plaintext: BinaryOrPlaintextClassifier,
feeds_classifier: FeedsClassifier
feeds_classifier: FeedsClassifier,
font_classifier: GroupedClassifier,
}
pub enum MediaType {
@ -33,35 +35,105 @@ pub enum NoSniffFlag {
}
impl MIMEClassifier {
//Performs MIME Type Sniffing Algorithm (section 7)
//Performs MIME Type Sniffing Algorithm (sections 7 and 8)
pub fn classify(&self,
context: LoadContext,
no_sniff_flag: NoSniffFlag,
apache_bug_flag: ApacheBugFlag,
supplied_type: &Option<(String, String)>,
data: &[u8]) -> (String, String) {
match *supplied_type {
None => self.sniff_unknown_type(no_sniff_flag, data),
Some(ref supplied_type) => {
let &(ref media_type, ref media_subtype) = supplied_type;
if MIMEClassifier::is_explicit_unknown(media_type, media_subtype) {
self.sniff_unknown_type(no_sniff_flag, data)
} else {
match no_sniff_flag {
NoSniffFlag::ON => supplied_type.clone(),
NoSniffFlag::OFF => match apache_bug_flag {
ApacheBugFlag::ON => self.sniff_text_or_data(data),
ApacheBugFlag::OFF => match MIMEClassifier::get_media_type(media_type,
media_subtype) {
Some(MediaType::Xml) => None,
Some(MediaType::Html) => self.feeds_classifier.classify(data),
Some(MediaType::Image) => self.image_classifier.classify(data),
Some(MediaType::AudioVideo) => self.audio_video_classifier.classify(data),
None => None
}.unwrap_or(supplied_type.clone())
let supplied_type_or_octet_stream = supplied_type.clone()
.unwrap_or(("application".to_owned(),
"octet-stream".to_owned()));
match context {
LoadContext::Browsing => match *supplied_type {
None => self.sniff_unknown_type(no_sniff_flag, data),
Some(ref supplied_type) => {
let &(ref media_type, ref media_subtype) = supplied_type;
if MIMEClassifier::is_explicit_unknown(media_type, media_subtype) {
self.sniff_unknown_type(no_sniff_flag, data)
} else {
match no_sniff_flag {
NoSniffFlag::ON => supplied_type.clone(),
NoSniffFlag::OFF => match apache_bug_flag {
ApacheBugFlag::ON => self.sniff_text_or_data(data),
ApacheBugFlag::OFF => match MIMEClassifier::get_media_type(media_type,
media_subtype) {
Some(MediaType::Html) => self.feeds_classifier.classify(data),
Some(MediaType::Image) => self.image_classifier.classify(data),
Some(MediaType::AudioVideo) => self.audio_video_classifier.classify(data),
Some(MediaType::Xml) | None => None,
}.unwrap_or(supplied_type.clone())
}
}
}
}
}
},
LoadContext::Image => {
// Section 8.2 Sniffing an image context
match MIMEClassifier::maybe_get_media_type(supplied_type) {
Some(MediaType::Xml) => None,
_ => self.image_classifier.classify(data),
}.unwrap_or(supplied_type_or_octet_stream)
},
LoadContext::AudioVideo => {
// Section 8.3 Sniffing an image context
match MIMEClassifier::maybe_get_media_type(supplied_type) {
Some(MediaType::Xml) => None,
_ => self.audio_video_classifier.classify(data),
}.unwrap_or(supplied_type_or_octet_stream)
},
LoadContext::Plugin => {
// 8.4 Sniffing in a plugin context
//
// This section was *not* finalized in the specs at the time
// of this implementation.
match *supplied_type {
None => ("application".to_owned(), "octet-stream".to_owned()),
_ => supplied_type_or_octet_stream,
}
},
LoadContext::Style => {
// 8.5 Sniffing in a style context
//
// This section was *not* finalized in the specs at the time
// of this implementation.
match *supplied_type {
None => ("text".to_owned(), "css".to_owned()),
_ => supplied_type_or_octet_stream,
}
},
LoadContext::Script => {
// 8.6 Sniffing in a script context
//
// This section was *not* finalized in the specs at the time
// of this implementation.
match *supplied_type {
None => ("text".to_owned(), "javascript".to_owned()),
_ => supplied_type_or_octet_stream,
}
},
LoadContext::Font => {
// 8.7 Sniffing in a font context
match MIMEClassifier::maybe_get_media_type(supplied_type) {
Some(MediaType::Xml) => None,
_ => self.font_classifier.classify(data),
}.unwrap_or(supplied_type_or_octet_stream)
},
LoadContext::TextTrack => {
// 8.8 Sniffing in a text track context
//
// This section was *not* finalized in the specs at the time
// of this implementation.
("text".to_owned(), "vtt".to_owned())
},
LoadContext::CacheManifest => {
// 8.9 Sniffing in a cache manifest context
//
// This section was *not* finalized in the specs at the time
// of this implementation.
("text".to_owned(), "cache-manifest".to_owned())
},
}
}
@ -73,7 +145,8 @@ impl MIMEClassifier {
plaintext_classifier: GroupedClassifier::plaintext_classifier(),
archive_classifier: GroupedClassifier::archive_classifier(),
binary_or_plaintext: BinaryOrPlaintextClassifier,
feeds_classifier: FeedsClassifier
feeds_classifier: FeedsClassifier,
font_classifier: GroupedClassifier::font_classifier()
}
}
@ -143,6 +216,12 @@ impl MIMEClassifier {
None
}
}
fn maybe_get_media_type(supplied_type: &Option<(String, String)>) -> Option<MediaType> {
supplied_type.as_ref().and_then(|&(ref media_type, ref media_subtype)| {
MIMEClassifier::get_media_type(media_type, media_subtype)
})
}
}
pub fn as_string_option(tup: Option<(&'static str, &'static str)>) -> Option<(String, String)> {
@ -375,8 +454,6 @@ impl GroupedClassifier {
}
}
// TODO: Use this in font context classifier
#[allow(dead_code)]
fn font_classifier() -> GroupedClassifier {
GroupedClassifier {
byte_matchers: vec![

View file

@ -17,6 +17,7 @@ use hyper::header::{ContentType, Header, SetCookie};
use hyper::mime::{Mime, SubLevel, TopLevel};
use ipc_channel::ipc::{self, IpcReceiver, IpcSender};
use mime_classifier::{ApacheBugFlag, MIMEClassifier, NoSniffFlag};
use net_traits::LoadContext;
use net_traits::ProgressMsg::Done;
use net_traits::{AsyncResponseTarget, Metadata, ProgressMsg, ResourceTask, ResponseAction};
use net_traits::{ControlMsg, CookieSource, LoadConsumer, LoadData, LoadResponse, ResourceId};
@ -65,14 +66,16 @@ pub fn send_error(url: Url, err: String, start_chan: LoadConsumer) {
/// For use by loaders in responding to a Load message that allows content sniffing.
pub fn start_sending_sniffed(start_chan: LoadConsumer, metadata: Metadata,
classifier: Arc<MIMEClassifier>, partial_body: &[u8])
classifier: Arc<MIMEClassifier>, partial_body: &[u8],
context: LoadContext)
-> ProgressSender {
start_sending_sniffed_opt(start_chan, metadata, classifier, partial_body).ok().unwrap()
start_sending_sniffed_opt(start_chan, metadata, classifier, partial_body, context).ok().unwrap()
}
/// For use by loaders in responding to a Load message that allows content sniffing.
pub fn start_sending_sniffed_opt(start_chan: LoadConsumer, mut metadata: Metadata,
classifier: Arc<MIMEClassifier>, partial_body: &[u8])
classifier: Arc<MIMEClassifier>, partial_body: &[u8],
context: LoadContext)
-> Result<ProgressSender, ()> {
if opts::get().sniff_mime_types {
// TODO: should be calculated in the resource loader, from pull requeset #4094
@ -94,10 +97,11 @@ pub fn start_sending_sniffed_opt(start_chan: LoadConsumer, mut metadata: Metadat
}
let supplied_type =
metadata.content_type.map(|ContentType(Mime(toplevel, sublevel, _))| {
metadata.content_type.as_ref().map(|&ContentType(Mime(ref toplevel, ref sublevel, _))| {
(format!("{}", toplevel), format!("{}", sublevel))
});
let (toplevel, sublevel) = classifier.classify(no_sniff,
let (toplevel, sublevel) = classifier.classify(context,
no_sniff,
check_for_apache_bug,
&supplied_type,
&partial_body);