diff --git a/components/net/about_loader.rs b/components/net/about_loader.rs index 140d4d7df59..3fd1a09c315 100644 --- a/components/net/about_loader.rs +++ b/components/net/about_loader.rs @@ -4,6 +4,7 @@ use net_traits::{LoadData, Metadata}; use net_traits::ProgressMsg::Done; +use mime_classifier::MIMEClassifier; use resource_task::start_sending; use file_loader; @@ -13,8 +14,9 @@ use util::resource_files::resources_dir_path; use std::borrow::IntoCow; use std::fs::PathExt; +use std::sync::Arc; -pub fn factory(mut load_data: LoadData) { +pub fn factory(mut load_data: LoadData, classifier: Arc) { match load_data.url.non_relative_scheme_data().unwrap() { "blank" => { let start_chan = load_data.consumer; @@ -42,5 +44,5 @@ pub fn factory(mut load_data: LoadData) { return } }; - file_loader::factory(load_data) + file_loader::factory(load_data, classifier) } diff --git a/components/net/data_loader.rs b/components/net/data_loader.rs index e052abc8d07..7bd0a3bf887 100644 --- a/components/net/data_loader.rs +++ b/components/net/data_loader.rs @@ -4,22 +4,24 @@ use net_traits::{LoadData, Metadata}; use net_traits::ProgressMsg::{Payload, Done}; +use mime_classifier::MIMEClassifier; use resource_task::start_sending; use rustc_serialize::base64::FromBase64; use hyper::mime::Mime; +use std::sync::Arc; use url::{percent_decode, SchemeData}; -pub fn factory(load_data: LoadData) { +pub fn factory(load_data: LoadData, classifier: Arc) { // NB: we don't spawn a new task. // Hypothesis: data URLs are too small for parallel base64 etc. to be worth it. // Should be tested at some point. // Left in separate function to allow easy moving to a task, if desired. - load(load_data) + load(load_data, classifier) } -fn load(load_data: LoadData) { +fn load(load_data: LoadData, _classifier: Arc) { let start_chan = load_data.consumer; let url = load_data.url; assert!(&*url.scheme == "data"); diff --git a/components/net/file_loader.rs b/components/net/file_loader.rs index 389afd48c0f..83fcc7ed59f 100644 --- a/components/net/file_loader.rs +++ b/components/net/file_loader.rs @@ -4,52 +4,78 @@ use net_traits::{LoadData, Metadata, ProgressMsg}; use net_traits::ProgressMsg::{Payload, Done}; -use resource_task::start_sending; +use mime_classifier::MIMEClassifier; +use resource_task::{start_sending, start_sending_sniffed}; use std::borrow::ToOwned; use std::io; use std::fs::File; use std::path::PathBuf; +use std::sync::Arc; use std::sync::mpsc::Sender; use util::task::spawn_named; static READ_SIZE: uint = 8192; +enum ReadStatus { + Partial(Vec), + EOF, +} + +fn read_block(reader: &mut io::Read) -> Result { + let mut buf = vec![0; READ_SIZE]; + match reader.read(buf.as_mut_slice()) { + Ok(0) => return Ok(ReadStatus::EOF), + Ok(n) => { + buf.truncate(n); + Ok(ReadStatus::Partial(buf)) + } + Err(e) => Err(e.description().to_string()), + } +} + fn read_all(reader: &mut io::Read, progress_chan: &Sender) - -> Result<(), String> { + -> Result<(), String> { loop { - let mut buf = vec![0; READ_SIZE]; - match reader.read(buf.as_mut_slice()) { - Ok(0) => return Ok(()), - Ok(n) => { - buf.truncate(n); - progress_chan.send(Payload(buf)).unwrap(); - }, - Err(e) => return Err(e.description().to_string()), + match try!(read_block(reader)) { + ReadStatus::Partial(buf) => progress_chan.send(Payload(buf)).unwrap(), + ReadStatus::EOF => return Ok(()), } } } -pub fn factory(load_data: LoadData) { +pub fn factory(load_data: LoadData, classifier: Arc) { let url = load_data.url; let start_chan = load_data.consumer; assert!(&*url.scheme == "file"); - let progress_chan = start_sending(start_chan, Metadata::default(url.clone())); spawn_named("file_loader".to_owned(), move || { + let metadata = Metadata::default(url.clone()); let file_path: Result = url.to_file_path(); match file_path { Ok(file_path) => { match File::open(&file_path) { Ok(ref mut reader) => { - let res = read_all(reader, &progress_chan); + let res = read_block(reader); + let (res, progress_chan) = match res { + Ok(ReadStatus::Partial(buf)) => { + let progress_chan = start_sending_sniffed(start_chan, metadata, + classifier, &buf); + progress_chan.send(Payload(buf)).unwrap(); + (read_all(reader, &progress_chan), progress_chan) + } + Ok(ReadStatus::EOF) | Err(_) => + (res.map(|_| ()), start_sending(start_chan, metadata)), + }; progress_chan.send(Done(res)).unwrap(); } Err(e) => { + let progress_chan = start_sending(start_chan, metadata); progress_chan.send(Done(Err(e.description().to_string()))).unwrap(); } } } Err(_) => { + let progress_chan = start_sending(start_chan, metadata); progress_chan.send(Done(Err(url.to_string()))).unwrap(); } } diff --git a/components/net/http_loader.rs b/components/net/http_loader.rs index cd984823bc8..de9c9829d25 100644 --- a/components/net/http_loader.rs +++ b/components/net/http_loader.rs @@ -5,6 +5,7 @@ use net_traits::{ControlMsg, CookieSource, LoadData, LoadResponse, Metadata}; use net_traits::ProgressMsg; use net_traits::ProgressMsg::{Payload, Done}; +use mime_classifier::MIMEClassifier; use resource_task::start_sending_opt; use log; @@ -21,6 +22,7 @@ use hyper::status::{StatusCode, StatusClass}; use std::error::Error; use openssl::ssl::{SslContext, SslVerifyMode}; use std::io::{self, Read, Write}; +use std::sync::Arc; use std::sync::mpsc::{Sender, channel}; use std::thunk::Invoke; use util::task::spawn_named; @@ -31,9 +33,9 @@ use url::{Url, UrlParser}; use std::borrow::ToOwned; pub fn factory(cookies_chan: Sender) - -> Box + Send> { - box move |(load_data,)| { - spawn_named("http_loader".to_owned(), move || load(load_data, cookies_chan)) + -> Box)> + Send> { + box move |(load_data, classifier)| { + spawn_named("http_loader".to_owned(), move || load(load_data, classifier, cookies_chan)) } } @@ -47,7 +49,7 @@ fn send_error(url: Url, err: String, start_chan: Sender) { }; } -fn load(mut load_data: LoadData, cookies_chan: Sender) { +fn load(mut load_data: LoadData, classifier: Arc, cookies_chan: Sender) { // FIXME: At the time of writing this FIXME, servo didn't have any central // location for configuration. If you're reading this and such a // repository DOES exist, please update this constant to use it. @@ -122,7 +124,7 @@ reason: \"certificate verify failed\" }]"; let mut image = resources_dir_path(); image.push("badcert.html"); let load_data = LoadData::new(Url::from_file_path(&*image).unwrap(), start_chan); - file_loader::factory(load_data); + file_loader::factory(load_data, classifier); return; }, Err(e) => { diff --git a/components/net/mime_classifier.rs b/components/net/mime_classifier.rs index 58ea4f09cfe..eaf7b47845b 100644 --- a/components/net/mime_classifier.rs +++ b/components/net/mime_classifier.rs @@ -315,13 +315,13 @@ impl MIMEChecker for BinaryOrPlaintextClassifier { } } struct GroupedClassifier { - byte_matchers: Vec>, + byte_matchers: Vec>, } impl GroupedClassifier { fn image_classifer() -> GroupedClassifier { GroupedClassifier { byte_matchers: vec![ - box ByteMatcher::image_x_icon() as Box, + box ByteMatcher::image_x_icon(), box ByteMatcher::image_x_icon_cursor(), box ByteMatcher::image_bmp(), box ByteMatcher::image_gif89a(), @@ -335,7 +335,7 @@ impl GroupedClassifier { fn audio_video_classifer() -> GroupedClassifier { GroupedClassifier{ byte_matchers: vec![ - box ByteMatcher::video_webm() as Box, + box ByteMatcher::video_webm(), box ByteMatcher::audio_basic(), box ByteMatcher::audio_aiff(), box ByteMatcher::audio_mpeg(), @@ -350,7 +350,7 @@ impl GroupedClassifier { fn scriptable_classifier() -> GroupedClassifier { GroupedClassifier{ byte_matchers: vec![ - box ByteMatcher::text_html_doctype() as Box, + box ByteMatcher::text_html_doctype(), box ByteMatcher::text_html_page(), box ByteMatcher::text_html_head(), box ByteMatcher::text_html_script(), @@ -376,7 +376,7 @@ impl GroupedClassifier { fn plaintext_classifier() -> GroupedClassifier { GroupedClassifier{ byte_matchers: vec![ - box ByteMatcher::text_plain_utf_8_bom() as Box, + box ByteMatcher::text_plain_utf_8_bom(), box ByteMatcher::text_plain_utf_16le_bom(), box ByteMatcher::text_plain_utf_16be_bom(), box ByteMatcher::application_postscript() @@ -386,7 +386,7 @@ impl GroupedClassifier { fn archive_classifier() -> GroupedClassifier { GroupedClassifier { byte_matchers: vec![ - box ByteMatcher::application_x_gzip() as Box, + box ByteMatcher::application_x_gzip(), box ByteMatcher::application_zip(), box ByteMatcher::application_x_rar_compressed() ] @@ -398,7 +398,7 @@ impl GroupedClassifier { fn font_classifier() -> GroupedClassifier { GroupedClassifier { byte_matchers: vec![ - box ByteMatcher::application_font_woff() as Box, + box ByteMatcher::application_font_woff(), box ByteMatcher::true_type_collection(), box ByteMatcher::open_type(), box ByteMatcher::true_type(), diff --git a/components/net/resource_task.rs b/components/net/resource_task.rs index 76146cb97e5..a63cf83a7af 100644 --- a/components/net/resource_task.rs +++ b/components/net/resource_task.rs @@ -10,10 +10,12 @@ use file_loader; use http_loader; use cookie_storage::CookieStorage; use cookie; +use mime_classifier::MIMEClassifier; use net_traits::{ControlMsg, LoadData, LoadResponse}; use net_traits::{Metadata, ProgressMsg, ResourceTask}; use net_traits::ProgressMsg::Done; +use util::opts; use util::task::spawn_named; use hyper::header::UserAgent; @@ -27,6 +29,7 @@ use std::collections::HashMap; use std::env; use std::fs::File; use std::io::{BufReader, Read}; +use std::sync::Arc; use std::sync::mpsc::{channel, Receiver, Sender}; use std::thunk::Invoke; @@ -61,6 +64,30 @@ pub fn start_sending(start_chan: Sender, metadata: Metadata) -> Se start_sending_opt(start_chan, metadata).ok().unwrap() } +/// For use by loaders in responding to a Load message that allows content sniffing. +pub fn start_sending_sniffed(start_chan: Sender, metadata: Metadata, + classifier: Arc, partial_body: &Vec) + -> Sender { + start_sending_sniffed_opt(start_chan, metadata, classifier, partial_body).ok().unwrap() +} + +/// For use by loaders in responding to a Load message that allows content sniffing. +pub fn start_sending_sniffed_opt(start_chan: Sender, mut metadata: Metadata, + classifier: Arc, partial_body: &Vec) + -> Result, ()> { + if opts::get().sniff_mime_types { + // TODO: should be calculated in the resource loader, from pull requeset #4094 + let nosniff = false; + let check_for_apache_bug = false; + + metadata.content_type = classifier.classify(nosniff, check_for_apache_bug, + &metadata.content_type, &partial_body); + + } + + start_sending_opt(start_chan, metadata) +} + /// For use by loaders in responding to a Load message. pub fn start_sending_opt(start_chan: Sender, metadata: Metadata) -> Result, ()> { let (progress_chan, progress_port) = channel(); @@ -123,6 +150,7 @@ struct ResourceManager { user_agent: Option, cookie_storage: CookieStorage, resource_task: Sender, + mime_classifier: Arc, } impl ResourceManager { @@ -133,6 +161,7 @@ impl ResourceManager { user_agent: user_agent, cookie_storage: CookieStorage::new(), resource_task: resource_task, + mime_classifier: Arc::new(MIMEClassifier::new()), } } } @@ -174,10 +203,10 @@ impl ResourceManager { self.user_agent.as_ref().map(|ua| load_data.headers.set(UserAgent(ua.clone()))); - fn from_factory(factory: fn(LoadData,)) - -> Box + Send> { - box move |(load_data,)| { - factory(load_data) + fn from_factory(factory: fn(LoadData, Arc)) + -> Box)> + Send> { + box move |(load_data, classifier)| { + factory(load_data, classifier) } } @@ -195,7 +224,7 @@ impl ResourceManager { }; debug!("resource_task: loading url: {}", load_data.url.serialize()); - loader.invoke((load_data,)); + loader.invoke((load_data, self.mime_classifier.clone())); } } diff --git a/components/net/sniffer_task.rs b/components/net/sniffer_task.rs deleted file mode 100644 index a5cdd2f69ff..00000000000 --- a/components/net/sniffer_task.rs +++ /dev/null @@ -1,117 +0,0 @@ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ - -//! A task that sniffs data -use std::sync::mpsc::{channel, Receiver, Sender}; -use std::thread::Builder; -use mime_classifier::MIMEClassifier; -use resource_task::{LoadResponse, LoadResponse, ProgressMsg}; - -pub type SnifferTask = Sender; - -pub fn new_sniffer_task() -> SnifferTask { - let(sen, rec) = channel(); - let builder = Builder::new().name("SnifferManager".to_string()); - builder.spawn(move || { - SnifferManager::new(rec).start(); - }).unwrap(); - sen -} - -struct SnifferManager { - data_receiver: Receiver, - mime_classifier: MIMEClassifier -} - -impl SnifferManager { - fn new(data_receiver: Receiver ) -> SnifferManager { - SnifferManager { - data_receiver: data_receiver, - mime_classifier: MIMEClassifier::new() - } - } -} - -impl SnifferManager { - fn start(self) { - - for mut snif_data in self.data_receiver.iter() { - // Read all the data - let mut resource_data = vec!(); - loop { - match snif_data.load_response.progress_port.recv().unwrap() { - ProgressMsg::Payload(data) => { - resource_data.push_all(data.as_slice()); - } - ProgressMsg::Done(res) => { - let (new_progress_chan, new_progress_port) = channel(); - - // TODO: should be calculated in the resource loader, from pull requeset #4094 - let nosniff = false; - let check_for_apache_bug = false; - - // We have all the data, go ahead and sniff it and replace the Content-Type - if res.is_ok() { - snif_data.load_response.metadata.content_type = self.mime_classifier.classify( - nosniff,check_for_apache_bug,&snif_data.load_response.metadata.content_type, - &resource_data - ); - } - let load_response = LoadResponse { - progress_port: new_progress_port, - metadata: snif_data.load_response.metadata, - }; - - if snif_data.consumer.send(load_response).is_err() { - break; - } - if resource_data.len() > 0 { - new_progress_chan.send(ProgressMsg::Payload(resource_data)).unwrap(); - } - new_progress_chan.send(ProgressMsg::Done(res)).unwrap(); - return; - } - } - } - } // end for - } -} - -#[cfg(test)] -pub fn new_mock_sniffer_task() -> SnifferTask { - let(sen, rec) = channel(); - let builder = TaskBuilder::new().named("SnifferManager"); - builder.spawn(move || { - MockSnifferManager::new(rec).start(); - }); - sen -} - -#[cfg(test)] -struct MockSnifferManager { - data_receiver: Receiver, -} - -#[cfg(test)] -impl MockSnifferManager { - fn new(data_receiver: Receiver ) -> MockSnifferManager { - MockSnifferManager { - data_receiver: data_receiver, - } - } -} - -#[cfg(test)] -impl MockSnifferManager { - fn start(self) { - loop { - match self.data_receiver.recv() { - Ok(snif_data) => { - let _ = snif_data.consumer.send(snif_data.load_response); - } - Err(_) => break, - } - } - } -} diff --git a/components/util/opts.rs b/components/util/opts.rs index f445359407e..04a074adbeb 100644 --- a/components/util/opts.rs +++ b/components/util/opts.rs @@ -128,6 +128,9 @@ pub struct Opts { /// A specific path to find required resources (such as user-agent.css). pub resources_path: Option, + + /// Whether MIME sniffing should be used + pub sniff_mime_types: bool, } fn print_usage(app: &str, opts: &[getopts::OptGroup]) { @@ -207,6 +210,7 @@ pub fn default_opts() -> Opts { validate_display_list_geometry: false, profile_tasks: false, resources_path: None, + sniff_mime_types: false, } } @@ -238,6 +242,7 @@ pub fn from_cmdline_args(args: &[String]) -> bool { getopts::optflag("h", "help", "Print this message"), getopts::optopt("r", "render-api", "Set the rendering API to use", "gl|mesa"), getopts::optopt("", "resources-path", "Path to find static resources", "/home/servo/resources"), + getopts::optflag("", "sniff-mime-types" , "Enable MIME sniffing"), ); let opt_match = match getopts::getopts(args, opts.as_slice()) { @@ -360,6 +365,7 @@ pub fn from_cmdline_args(args: &[String]) -> bool { relayout_event: debug_options.contains(&"relayout-event"), validate_display_list_geometry: debug_options.contains(&"validate-display-list-geometry"), resources_path: opt_match.opt_str("resources-path"), + sniff_mime_types: opt_match.opt_present("sniff-mime-types"), }; set_opts(opts);