Enable optional mime sniffing, and integrate it with the file loader.

This commit is contained in:
Josh Matthews 2015-02-21 11:22:31 -05:00
parent 2d730f2ae9
commit a3201bc1ac
8 changed files with 102 additions and 152 deletions

View file

@ -4,6 +4,7 @@
use net_traits::{LoadData, Metadata};
use net_traits::ProgressMsg::Done;
use mime_classifier::MIMEClassifier;
use resource_task::start_sending;
use file_loader;
@ -13,8 +14,9 @@ use util::resource_files::resources_dir_path;
use std::borrow::IntoCow;
use std::fs::PathExt;
use std::sync::Arc;
pub fn factory(mut load_data: LoadData) {
pub fn factory(mut load_data: LoadData, classifier: Arc<MIMEClassifier>) {
match load_data.url.non_relative_scheme_data().unwrap() {
"blank" => {
let start_chan = load_data.consumer;
@ -42,5 +44,5 @@ pub fn factory(mut load_data: LoadData) {
return
}
};
file_loader::factory(load_data)
file_loader::factory(load_data, classifier)
}

View file

@ -4,22 +4,24 @@
use net_traits::{LoadData, Metadata};
use net_traits::ProgressMsg::{Payload, Done};
use mime_classifier::MIMEClassifier;
use resource_task::start_sending;
use rustc_serialize::base64::FromBase64;
use hyper::mime::Mime;
use std::sync::Arc;
use url::{percent_decode, SchemeData};
pub fn factory(load_data: LoadData) {
pub fn factory(load_data: LoadData, classifier: Arc<MIMEClassifier>) {
// NB: we don't spawn a new task.
// Hypothesis: data URLs are too small for parallel base64 etc. to be worth it.
// Should be tested at some point.
// Left in separate function to allow easy moving to a task, if desired.
load(load_data)
load(load_data, classifier)
}
fn load(load_data: LoadData) {
fn load(load_data: LoadData, _classifier: Arc<MIMEClassifier>) {
let start_chan = load_data.consumer;
let url = load_data.url;
assert!(&*url.scheme == "data");

View file

@ -4,52 +4,78 @@
use net_traits::{LoadData, Metadata, ProgressMsg};
use net_traits::ProgressMsg::{Payload, Done};
use resource_task::start_sending;
use mime_classifier::MIMEClassifier;
use resource_task::{start_sending, start_sending_sniffed};
use std::borrow::ToOwned;
use std::io;
use std::fs::File;
use std::path::PathBuf;
use std::sync::Arc;
use std::sync::mpsc::Sender;
use util::task::spawn_named;
static READ_SIZE: uint = 8192;
enum ReadStatus {
Partial(Vec<u8>),
EOF,
}
fn read_block(reader: &mut io::Read) -> Result<ReadStatus, String> {
let mut buf = vec![0; READ_SIZE];
match reader.read(buf.as_mut_slice()) {
Ok(0) => return Ok(ReadStatus::EOF),
Ok(n) => {
buf.truncate(n);
Ok(ReadStatus::Partial(buf))
}
Err(e) => Err(e.description().to_string()),
}
}
fn read_all(reader: &mut io::Read, progress_chan: &Sender<ProgressMsg>)
-> Result<(), String> {
-> Result<(), String> {
loop {
let mut buf = vec![0; READ_SIZE];
match reader.read(buf.as_mut_slice()) {
Ok(0) => return Ok(()),
Ok(n) => {
buf.truncate(n);
progress_chan.send(Payload(buf)).unwrap();
},
Err(e) => return Err(e.description().to_string()),
match try!(read_block(reader)) {
ReadStatus::Partial(buf) => progress_chan.send(Payload(buf)).unwrap(),
ReadStatus::EOF => return Ok(()),
}
}
}
pub fn factory(load_data: LoadData) {
pub fn factory(load_data: LoadData, classifier: Arc<MIMEClassifier>) {
let url = load_data.url;
let start_chan = load_data.consumer;
assert!(&*url.scheme == "file");
let progress_chan = start_sending(start_chan, Metadata::default(url.clone()));
spawn_named("file_loader".to_owned(), move || {
let metadata = Metadata::default(url.clone());
let file_path: Result<PathBuf, ()> = url.to_file_path();
match file_path {
Ok(file_path) => {
match File::open(&file_path) {
Ok(ref mut reader) => {
let res = read_all(reader, &progress_chan);
let res = read_block(reader);
let (res, progress_chan) = match res {
Ok(ReadStatus::Partial(buf)) => {
let progress_chan = start_sending_sniffed(start_chan, metadata,
classifier, &buf);
progress_chan.send(Payload(buf)).unwrap();
(read_all(reader, &progress_chan), progress_chan)
}
Ok(ReadStatus::EOF) | Err(_) =>
(res.map(|_| ()), start_sending(start_chan, metadata)),
};
progress_chan.send(Done(res)).unwrap();
}
Err(e) => {
let progress_chan = start_sending(start_chan, metadata);
progress_chan.send(Done(Err(e.description().to_string()))).unwrap();
}
}
}
Err(_) => {
let progress_chan = start_sending(start_chan, metadata);
progress_chan.send(Done(Err(url.to_string()))).unwrap();
}
}

View file

@ -5,6 +5,7 @@
use net_traits::{ControlMsg, CookieSource, LoadData, LoadResponse, Metadata};
use net_traits::ProgressMsg;
use net_traits::ProgressMsg::{Payload, Done};
use mime_classifier::MIMEClassifier;
use resource_task::start_sending_opt;
use log;
@ -21,6 +22,7 @@ use hyper::status::{StatusCode, StatusClass};
use std::error::Error;
use openssl::ssl::{SslContext, SslVerifyMode};
use std::io::{self, Read, Write};
use std::sync::Arc;
use std::sync::mpsc::{Sender, channel};
use std::thunk::Invoke;
use util::task::spawn_named;
@ -31,9 +33,9 @@ use url::{Url, UrlParser};
use std::borrow::ToOwned;
pub fn factory(cookies_chan: Sender<ControlMsg>)
-> Box<Invoke<(LoadData,)> + Send> {
box move |(load_data,)| {
spawn_named("http_loader".to_owned(), move || load(load_data, cookies_chan))
-> Box<Invoke<(LoadData, Arc<MIMEClassifier>)> + Send> {
box move |(load_data, classifier)| {
spawn_named("http_loader".to_owned(), move || load(load_data, classifier, cookies_chan))
}
}
@ -47,7 +49,7 @@ fn send_error(url: Url, err: String, start_chan: Sender<LoadResponse>) {
};
}
fn load(mut load_data: LoadData, cookies_chan: Sender<ControlMsg>) {
fn load(mut load_data: LoadData, classifier: Arc<MIMEClassifier>, cookies_chan: Sender<ControlMsg>) {
// FIXME: At the time of writing this FIXME, servo didn't have any central
// location for configuration. If you're reading this and such a
// repository DOES exist, please update this constant to use it.
@ -122,7 +124,7 @@ reason: \"certificate verify failed\" }]";
let mut image = resources_dir_path();
image.push("badcert.html");
let load_data = LoadData::new(Url::from_file_path(&*image).unwrap(), start_chan);
file_loader::factory(load_data);
file_loader::factory(load_data, classifier);
return;
},
Err(e) => {

View file

@ -315,13 +315,13 @@ impl MIMEChecker for BinaryOrPlaintextClassifier {
}
}
struct GroupedClassifier {
byte_matchers: Vec<Box<MIMEChecker + Send>>,
byte_matchers: Vec<Box<MIMEChecker + Send + Sync>>,
}
impl GroupedClassifier {
fn image_classifer() -> GroupedClassifier {
GroupedClassifier {
byte_matchers: vec![
box ByteMatcher::image_x_icon() as Box<MIMEChecker+Send>,
box ByteMatcher::image_x_icon(),
box ByteMatcher::image_x_icon_cursor(),
box ByteMatcher::image_bmp(),
box ByteMatcher::image_gif89a(),
@ -335,7 +335,7 @@ impl GroupedClassifier {
fn audio_video_classifer() -> GroupedClassifier {
GroupedClassifier{
byte_matchers: vec![
box ByteMatcher::video_webm() as Box<MIMEChecker+Send>,
box ByteMatcher::video_webm(),
box ByteMatcher::audio_basic(),
box ByteMatcher::audio_aiff(),
box ByteMatcher::audio_mpeg(),
@ -350,7 +350,7 @@ impl GroupedClassifier {
fn scriptable_classifier() -> GroupedClassifier {
GroupedClassifier{
byte_matchers: vec![
box ByteMatcher::text_html_doctype() as Box<MIMEChecker+Send>,
box ByteMatcher::text_html_doctype(),
box ByteMatcher::text_html_page(),
box ByteMatcher::text_html_head(),
box ByteMatcher::text_html_script(),
@ -376,7 +376,7 @@ impl GroupedClassifier {
fn plaintext_classifier() -> GroupedClassifier {
GroupedClassifier{
byte_matchers: vec![
box ByteMatcher::text_plain_utf_8_bom() as Box<MIMEChecker+Send>,
box ByteMatcher::text_plain_utf_8_bom(),
box ByteMatcher::text_plain_utf_16le_bom(),
box ByteMatcher::text_plain_utf_16be_bom(),
box ByteMatcher::application_postscript()
@ -386,7 +386,7 @@ impl GroupedClassifier {
fn archive_classifier() -> GroupedClassifier {
GroupedClassifier {
byte_matchers: vec![
box ByteMatcher::application_x_gzip() as Box<MIMEChecker+Send>,
box ByteMatcher::application_x_gzip(),
box ByteMatcher::application_zip(),
box ByteMatcher::application_x_rar_compressed()
]
@ -398,7 +398,7 @@ impl GroupedClassifier {
fn font_classifier() -> GroupedClassifier {
GroupedClassifier {
byte_matchers: vec![
box ByteMatcher::application_font_woff() as Box<MIMEChecker+Send>,
box ByteMatcher::application_font_woff(),
box ByteMatcher::true_type_collection(),
box ByteMatcher::open_type(),
box ByteMatcher::true_type(),

View file

@ -10,10 +10,12 @@ use file_loader;
use http_loader;
use cookie_storage::CookieStorage;
use cookie;
use mime_classifier::MIMEClassifier;
use net_traits::{ControlMsg, LoadData, LoadResponse};
use net_traits::{Metadata, ProgressMsg, ResourceTask};
use net_traits::ProgressMsg::Done;
use util::opts;
use util::task::spawn_named;
use hyper::header::UserAgent;
@ -27,6 +29,7 @@ use std::collections::HashMap;
use std::env;
use std::fs::File;
use std::io::{BufReader, Read};
use std::sync::Arc;
use std::sync::mpsc::{channel, Receiver, Sender};
use std::thunk::Invoke;
@ -61,6 +64,30 @@ pub fn start_sending(start_chan: Sender<LoadResponse>, metadata: Metadata) -> Se
start_sending_opt(start_chan, metadata).ok().unwrap()
}
/// For use by loaders in responding to a Load message that allows content sniffing.
pub fn start_sending_sniffed(start_chan: Sender<LoadResponse>, metadata: Metadata,
classifier: Arc<MIMEClassifier>, partial_body: &Vec<u8>)
-> Sender<ProgressMsg> {
start_sending_sniffed_opt(start_chan, metadata, classifier, partial_body).ok().unwrap()
}
/// For use by loaders in responding to a Load message that allows content sniffing.
pub fn start_sending_sniffed_opt(start_chan: Sender<LoadResponse>, mut metadata: Metadata,
classifier: Arc<MIMEClassifier>, partial_body: &Vec<u8>)
-> Result<Sender<ProgressMsg>, ()> {
if opts::get().sniff_mime_types {
// TODO: should be calculated in the resource loader, from pull requeset #4094
let nosniff = false;
let check_for_apache_bug = false;
metadata.content_type = classifier.classify(nosniff, check_for_apache_bug,
&metadata.content_type, &partial_body);
}
start_sending_opt(start_chan, metadata)
}
/// For use by loaders in responding to a Load message.
pub fn start_sending_opt(start_chan: Sender<LoadResponse>, metadata: Metadata) -> Result<Sender<ProgressMsg>, ()> {
let (progress_chan, progress_port) = channel();
@ -123,6 +150,7 @@ struct ResourceManager {
user_agent: Option<String>,
cookie_storage: CookieStorage,
resource_task: Sender<ControlMsg>,
mime_classifier: Arc<MIMEClassifier>,
}
impl ResourceManager {
@ -133,6 +161,7 @@ impl ResourceManager {
user_agent: user_agent,
cookie_storage: CookieStorage::new(),
resource_task: resource_task,
mime_classifier: Arc::new(MIMEClassifier::new()),
}
}
}
@ -174,10 +203,10 @@ impl ResourceManager {
self.user_agent.as_ref().map(|ua| load_data.headers.set(UserAgent(ua.clone())));
fn from_factory(factory: fn(LoadData,))
-> Box<Invoke<(LoadData,)> + Send> {
box move |(load_data,)| {
factory(load_data)
fn from_factory(factory: fn(LoadData, Arc<MIMEClassifier>))
-> Box<Invoke<(LoadData, Arc<MIMEClassifier>)> + Send> {
box move |(load_data, classifier)| {
factory(load_data, classifier)
}
}
@ -195,7 +224,7 @@ impl ResourceManager {
};
debug!("resource_task: loading url: {}", load_data.url.serialize());
loader.invoke((load_data,));
loader.invoke((load_data, self.mime_classifier.clone()));
}
}

View file

@ -1,117 +0,0 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
//! A task that sniffs data
use std::sync::mpsc::{channel, Receiver, Sender};
use std::thread::Builder;
use mime_classifier::MIMEClassifier;
use resource_task::{LoadResponse, LoadResponse, ProgressMsg};
pub type SnifferTask = Sender<TargetedLoadResponse>;
pub fn new_sniffer_task() -> SnifferTask {
let(sen, rec) = channel();
let builder = Builder::new().name("SnifferManager".to_string());
builder.spawn(move || {
SnifferManager::new(rec).start();
}).unwrap();
sen
}
struct SnifferManager {
data_receiver: Receiver<TargetedLoadResponse>,
mime_classifier: MIMEClassifier
}
impl SnifferManager {
fn new(data_receiver: Receiver <TargetedLoadResponse>) -> SnifferManager {
SnifferManager {
data_receiver: data_receiver,
mime_classifier: MIMEClassifier::new()
}
}
}
impl SnifferManager {
fn start(self) {
for mut snif_data in self.data_receiver.iter() {
// Read all the data
let mut resource_data = vec!();
loop {
match snif_data.load_response.progress_port.recv().unwrap() {
ProgressMsg::Payload(data) => {
resource_data.push_all(data.as_slice());
}
ProgressMsg::Done(res) => {
let (new_progress_chan, new_progress_port) = channel();
// TODO: should be calculated in the resource loader, from pull requeset #4094
let nosniff = false;
let check_for_apache_bug = false;
// We have all the data, go ahead and sniff it and replace the Content-Type
if res.is_ok() {
snif_data.load_response.metadata.content_type = self.mime_classifier.classify(
nosniff,check_for_apache_bug,&snif_data.load_response.metadata.content_type,
&resource_data
);
}
let load_response = LoadResponse {
progress_port: new_progress_port,
metadata: snif_data.load_response.metadata,
};
if snif_data.consumer.send(load_response).is_err() {
break;
}
if resource_data.len() > 0 {
new_progress_chan.send(ProgressMsg::Payload(resource_data)).unwrap();
}
new_progress_chan.send(ProgressMsg::Done(res)).unwrap();
return;
}
}
}
} // end for
}
}
#[cfg(test)]
pub fn new_mock_sniffer_task() -> SnifferTask {
let(sen, rec) = channel();
let builder = TaskBuilder::new().named("SnifferManager");
builder.spawn(move || {
MockSnifferManager::new(rec).start();
});
sen
}
#[cfg(test)]
struct MockSnifferManager {
data_receiver: Receiver<TargetedLoadResponse>,
}
#[cfg(test)]
impl MockSnifferManager {
fn new(data_receiver: Receiver <TargetedLoadResponse>) -> MockSnifferManager {
MockSnifferManager {
data_receiver: data_receiver,
}
}
}
#[cfg(test)]
impl MockSnifferManager {
fn start(self) {
loop {
match self.data_receiver.recv() {
Ok(snif_data) => {
let _ = snif_data.consumer.send(snif_data.load_response);
}
Err(_) => break,
}
}
}
}

View file

@ -128,6 +128,9 @@ pub struct Opts {
/// A specific path to find required resources (such as user-agent.css).
pub resources_path: Option<String>,
/// Whether MIME sniffing should be used
pub sniff_mime_types: bool,
}
fn print_usage(app: &str, opts: &[getopts::OptGroup]) {
@ -207,6 +210,7 @@ pub fn default_opts() -> Opts {
validate_display_list_geometry: false,
profile_tasks: false,
resources_path: None,
sniff_mime_types: false,
}
}
@ -238,6 +242,7 @@ pub fn from_cmdline_args(args: &[String]) -> bool {
getopts::optflag("h", "help", "Print this message"),
getopts::optopt("r", "render-api", "Set the rendering API to use", "gl|mesa"),
getopts::optopt("", "resources-path", "Path to find static resources", "/home/servo/resources"),
getopts::optflag("", "sniff-mime-types" , "Enable MIME sniffing"),
);
let opt_match = match getopts::getopts(args, opts.as_slice()) {
@ -360,6 +365,7 @@ pub fn from_cmdline_args(args: &[String]) -> bool {
relayout_event: debug_options.contains(&"relayout-event"),
validate_display_list_geometry: debug_options.contains(&"validate-display-list-geometry"),
resources_path: opt_match.opt_str("resources-path"),
sniff_mime_types: opt_match.opt_present("sniff-mime-types"),
};
set_opts(opts);