Auto merge of #5005 - jdm:mime-sniffing, r=jdm

This rebases and integrates #4209, removing the sniffer task (turns out it wasn't a great idea), and adds a `--sniff-mime-types` command line flag to enable sniffing for file:// and http:// resources. Tested against a random picture file on my harddrive. The actual MIME sniffing implementation can be extracted into a separate library separately.
This commit is contained in:
bors-servo 2015-04-07 12:48:06 -05:00
commit c7e210f24c
112 changed files with 2127 additions and 206 deletions

View file

@ -4,7 +4,8 @@
use net_traits::{LoadData, Metadata}; use net_traits::{LoadData, Metadata};
use net_traits::ProgressMsg::Done; use net_traits::ProgressMsg::Done;
use resource_task::{TargetedLoadResponse, start_sending, ResponseSenders}; use mime_classifier::MIMEClassifier;
use resource_task::start_sending;
use file_loader; use file_loader;
use url::Url; use url::Url;
@ -13,16 +14,13 @@ use util::resource_files::resources_dir_path;
use std::borrow::IntoCow; use std::borrow::IntoCow;
use std::fs::PathExt; use std::fs::PathExt;
use std::sync::mpsc::Sender; use std::sync::Arc;
pub fn factory(mut load_data: LoadData, start_chan: Sender<TargetedLoadResponse>) { pub fn factory(mut load_data: LoadData, classifier: Arc<MIMEClassifier>) {
let senders = ResponseSenders {
immediate_consumer: start_chan.clone(),
eventual_consumer: load_data.consumer.clone(),
};
match load_data.url.non_relative_scheme_data().unwrap() { match load_data.url.non_relative_scheme_data().unwrap() {
"blank" => { "blank" => {
let chan = start_sending(senders, Metadata { let start_chan = load_data.consumer;
let chan = start_sending(start_chan, Metadata {
final_url: load_data.url, final_url: load_data.url,
content_type: Some(("text".to_string(), "html".to_string())), content_type: Some(("text".to_string(), "html".to_string())),
charset: Some("utf-8".to_string()), charset: Some("utf-8".to_string()),
@ -40,10 +38,11 @@ pub fn factory(mut load_data: LoadData, start_chan: Sender<TargetedLoadResponse>
load_data.url = Url::from_file_path(&*path).unwrap(); load_data.url = Url::from_file_path(&*path).unwrap();
} }
_ => { _ => {
start_sending(senders, Metadata::default(load_data.url)) let start_chan = load_data.consumer;
start_sending(start_chan, Metadata::default(load_data.url))
.send(Done(Err("Unknown about: URL.".to_string()))).unwrap(); .send(Done(Err("Unknown about: URL.".to_string()))).unwrap();
return return
} }
}; };
file_loader::factory(load_data, start_chan) file_loader::factory(load_data, classifier)
} }

View file

@ -4,34 +4,30 @@
use net_traits::{LoadData, Metadata}; use net_traits::{LoadData, Metadata};
use net_traits::ProgressMsg::{Payload, Done}; use net_traits::ProgressMsg::{Payload, Done};
use resource_task::{TargetedLoadResponse, start_sending, ResponseSenders}; use mime_classifier::MIMEClassifier;
use resource_task::start_sending;
use rustc_serialize::base64::FromBase64; use rustc_serialize::base64::FromBase64;
use hyper::mime::Mime; use hyper::mime::Mime;
use std::sync::Arc;
use url::{percent_decode, SchemeData}; use url::{percent_decode, SchemeData};
use std::sync::mpsc::Sender; pub fn factory(load_data: LoadData, _classifier: Arc<MIMEClassifier>) {
pub fn factory(load_data: LoadData, start_chan: Sender<TargetedLoadResponse>) {
// NB: we don't spawn a new task. // NB: we don't spawn a new task.
// Hypothesis: data URLs are too small for parallel base64 etc. to be worth it. // Hypothesis: data URLs are too small for parallel base64 etc. to be worth it.
// Should be tested at some point. // Should be tested at some point.
// Left in separate function to allow easy moving to a task, if desired. // Left in separate function to allow easy moving to a task, if desired.
load(load_data, start_chan) load(load_data)
} }
fn load(load_data: LoadData, start_chan: Sender<TargetedLoadResponse>) { fn load(load_data: LoadData) {
let start_chan = load_data.consumer;
let url = load_data.url; let url = load_data.url;
assert!(&*url.scheme == "data"); assert!(&*url.scheme == "data");
let mut metadata = Metadata::default(url.clone()); let mut metadata = Metadata::default(url.clone());
let senders = ResponseSenders {
immediate_consumer: start_chan,
eventual_consumer: load_data.consumer,
};
// Split out content type and data. // Split out content type and data.
let mut scheme_data = match url.scheme_data { let mut scheme_data = match url.scheme_data {
SchemeData::NonRelative(scheme_data) => scheme_data, SchemeData::NonRelative(scheme_data) => scheme_data,
@ -46,7 +42,7 @@ fn load(load_data: LoadData, start_chan: Sender<TargetedLoadResponse>) {
} }
let parts: Vec<&str> = scheme_data.splitn(1, ',').collect(); let parts: Vec<&str> = scheme_data.splitn(1, ',').collect();
if parts.len() != 2 { if parts.len() != 2 {
start_sending(senders, metadata).send(Done(Err("invalid data uri".to_string()))).unwrap(); start_sending(start_chan, metadata).send(Done(Err("invalid data uri".to_string()))).unwrap();
return; return;
} }
@ -64,7 +60,7 @@ fn load(load_data: LoadData, start_chan: Sender<TargetedLoadResponse>) {
let content_type: Option<Mime> = ct_str.parse().ok(); let content_type: Option<Mime> = ct_str.parse().ok();
metadata.set_content_type(content_type.as_ref()); metadata.set_content_type(content_type.as_ref());
let progress_chan = start_sending(senders, metadata); let progress_chan = start_sending(start_chan, metadata);
let bytes = percent_decode(parts[1].as_bytes()); let bytes = percent_decode(parts[1].as_bytes());
if is_base64 { if is_base64 {
@ -93,11 +89,9 @@ fn assert_parse(url: &'static str,
data: Option<Vec<u8>>) { data: Option<Vec<u8>>) {
use std::sync::mpsc::channel; use std::sync::mpsc::channel;
use url::Url; use url::Url;
use sniffer_task;
let (start_chan, start_port) = channel(); let (start_chan, start_port) = channel();
let sniffer_task = sniffer_task::new_sniffer_task(); load(LoadData::new(Url::parse(url).unwrap(), start_chan));
load(LoadData::new(Url::parse(url).unwrap(), start_chan), sniffer_task);
let response = start_port.recv().unwrap(); let response = start_port.recv().unwrap();
assert_eq!(&response.metadata.content_type, &content_type); assert_eq!(&response.metadata.content_type, &content_type);

View file

@ -4,55 +4,78 @@
use net_traits::{LoadData, Metadata, ProgressMsg}; use net_traits::{LoadData, Metadata, ProgressMsg};
use net_traits::ProgressMsg::{Payload, Done}; use net_traits::ProgressMsg::{Payload, Done};
use resource_task::{start_sending, TargetedLoadResponse, ResponseSenders}; use mime_classifier::MIMEClassifier;
use resource_task::{start_sending, start_sending_sniffed};
use std::borrow::ToOwned; use std::borrow::ToOwned;
use std::io; use std::io;
use std::fs::File; use std::fs::File;
use std::path::PathBuf; use std::path::PathBuf;
use std::sync::Arc;
use std::sync::mpsc::Sender; use std::sync::mpsc::Sender;
use util::task::spawn_named; use util::task::spawn_named;
static READ_SIZE: uint = 8192; static READ_SIZE: uint = 8192;
enum ReadStatus {
Partial(Vec<u8>),
EOF,
}
fn read_block(reader: &mut io::Read) -> Result<ReadStatus, String> {
let mut buf = vec![0; READ_SIZE];
match reader.read(buf.as_mut_slice()) {
Ok(0) => return Ok(ReadStatus::EOF),
Ok(n) => {
buf.truncate(n);
Ok(ReadStatus::Partial(buf))
}
Err(e) => Err(e.description().to_string()),
}
}
fn read_all(reader: &mut io::Read, progress_chan: &Sender<ProgressMsg>) fn read_all(reader: &mut io::Read, progress_chan: &Sender<ProgressMsg>)
-> Result<(), String> { -> Result<(), String> {
loop { loop {
let mut buf = vec![0; READ_SIZE]; match try!(read_block(reader)) {
match reader.read(buf.as_mut_slice()) { ReadStatus::Partial(buf) => progress_chan.send(Payload(buf)).unwrap(),
Ok(0) => return Ok(()), ReadStatus::EOF => return Ok(()),
Ok(n) => {
buf.truncate(n);
progress_chan.send(Payload(buf)).unwrap();
},
Err(e) => return Err(e.description().to_string()),
} }
} }
} }
pub fn factory(load_data: LoadData, start_chan: Sender<TargetedLoadResponse>) { pub fn factory(load_data: LoadData, classifier: Arc<MIMEClassifier>) {
let url = load_data.url; let url = load_data.url;
let start_chan = load_data.consumer;
assert!(&*url.scheme == "file"); assert!(&*url.scheme == "file");
let senders = ResponseSenders {
immediate_consumer: start_chan,
eventual_consumer: load_data.consumer,
};
let progress_chan = start_sending(senders, Metadata::default(url.clone()));
spawn_named("file_loader".to_owned(), move || { spawn_named("file_loader".to_owned(), move || {
let metadata = Metadata::default(url.clone());
let file_path: Result<PathBuf, ()> = url.to_file_path(); let file_path: Result<PathBuf, ()> = url.to_file_path();
match file_path { match file_path {
Ok(file_path) => { Ok(file_path) => {
match File::open(&file_path) { match File::open(&file_path) {
Ok(ref mut reader) => { Ok(ref mut reader) => {
let res = read_all(reader, &progress_chan); let res = read_block(reader);
let (res, progress_chan) = match res {
Ok(ReadStatus::Partial(buf)) => {
let progress_chan = start_sending_sniffed(start_chan, metadata,
classifier, &buf);
progress_chan.send(Payload(buf)).unwrap();
(read_all(reader, &progress_chan), progress_chan)
}
Ok(ReadStatus::EOF) | Err(_) =>
(res.map(|_| ()), start_sending(start_chan, metadata)),
};
progress_chan.send(Done(res)).unwrap(); progress_chan.send(Done(res)).unwrap();
} }
Err(e) => { Err(e) => {
let progress_chan = start_sending(start_chan, metadata);
progress_chan.send(Done(Err(e.description().to_string()))).unwrap(); progress_chan.send(Done(Err(e.description().to_string()))).unwrap();
} }
} }
} }
Err(_) => { Err(_) => {
let progress_chan = start_sending(start_chan, metadata);
progress_chan.send(Done(Err(url.to_string()))).unwrap(); progress_chan.send(Done(Err(url.to_string()))).unwrap();
} }
} }

View file

@ -2,10 +2,10 @@
* License, v. 2.0. If a copy of the MPL was not distributed with this * License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
use net_traits::{ControlMsg, CookieSource, LoadData, Metadata}; use net_traits::{ControlMsg, CookieSource, LoadData, LoadResponse, Metadata};
use net_traits::ProgressMsg;
use net_traits::ProgressMsg::{Payload, Done}; use net_traits::ProgressMsg::{Payload, Done};
use resource_task::{TargetedLoadResponse, start_sending_opt, ResponseSenders}; use mime_classifier::MIMEClassifier;
use resource_task::{start_sending_opt, start_sending_sniffed_opt};
use log; use log;
use std::collections::HashSet; use std::collections::HashSet;
@ -21,6 +21,7 @@ use hyper::status::{StatusCode, StatusClass};
use std::error::Error; use std::error::Error;
use openssl::ssl::{SslContext, SslVerifyMode}; use openssl::ssl::{SslContext, SslVerifyMode};
use std::io::{self, Read, Write}; use std::io::{self, Read, Write};
use std::sync::Arc;
use std::sync::mpsc::{Sender, channel}; use std::sync::mpsc::{Sender, channel};
use std::thunk::Invoke; use std::thunk::Invoke;
use util::task::spawn_named; use util::task::spawn_named;
@ -31,36 +32,50 @@ use url::{Url, UrlParser};
use std::borrow::ToOwned; use std::borrow::ToOwned;
pub fn factory(cookies_chan: Sender<ControlMsg>) pub fn factory(cookies_chan: Sender<ControlMsg>)
-> Box<Invoke<(LoadData, Sender<TargetedLoadResponse>)> + Send> { -> Box<Invoke<(LoadData, Arc<MIMEClassifier>)> + Send> {
box move |(load_data, start_chan)| { box move |(load_data, classifier)| {
spawn_named("http_loader".to_owned(), move || load(load_data, start_chan, cookies_chan)) spawn_named("http_loader".to_owned(), move || load(load_data, classifier, cookies_chan))
} }
} }
fn send_error(url: Url, err: String, senders: ResponseSenders) { fn send_error(url: Url, err: String, start_chan: Sender<LoadResponse>) {
let mut metadata: Metadata = Metadata::default(url); let mut metadata: Metadata = Metadata::default(url);
metadata.status = None; metadata.status = None;
match start_sending_opt(senders, metadata) { match start_sending_opt(start_chan, metadata) {
Ok(p) => p.send(Done(Err(err))).unwrap(), Ok(p) => p.send(Done(Err(err))).unwrap(),
_ => {} _ => {}
}; };
} }
fn load(mut load_data: LoadData, start_chan: Sender<TargetedLoadResponse>, cookies_chan: Sender<ControlMsg>) { enum ReadResult {
Payload(Vec<u8>),
EOF,
}
fn read_block<R: Read>(reader: &mut R) -> Result<ReadResult, ()> {
let mut buf = vec![0; 1024];
match reader.read(buf.as_mut_slice()) {
Ok(len) if len > 0 => {
unsafe { buf.set_len(len); }
Ok(ReadResult::Payload(buf))
}
Ok(_) => Ok(ReadResult::EOF),
Err(_) => Err(()),
}
}
fn load(mut load_data: LoadData, classifier: Arc<MIMEClassifier>, cookies_chan: Sender<ControlMsg>) {
// FIXME: At the time of writing this FIXME, servo didn't have any central // FIXME: At the time of writing this FIXME, servo didn't have any central
// location for configuration. If you're reading this and such a // location for configuration. If you're reading this and such a
// repository DOES exist, please update this constant to use it. // repository DOES exist, please update this constant to use it.
let max_redirects = 50u; let max_redirects = 50u;
let mut iters = 0u; let mut iters = 0u;
let start_chan = load_data.consumer;
let mut url = load_data.url.clone(); let mut url = load_data.url.clone();
let mut redirected_to = HashSet::new(); let mut redirected_to = HashSet::new();
let senders = ResponseSenders {
immediate_consumer: start_chan,
eventual_consumer: load_data.consumer
};
// If the URL is a view-source scheme then the scheme data contains the // If the URL is a view-source scheme then the scheme data contains the
// real URL that should be used for which the source is to be viewed. // real URL that should be used for which the source is to be viewed.
// Change our existing URL to that and keep note that we are viewing // Change our existing URL to that and keep note that we are viewing
@ -73,7 +88,7 @@ fn load(mut load_data: LoadData, start_chan: Sender<TargetedLoadResponse>, cooki
"http" | "https" => {} "http" | "https" => {}
_ => { _ => {
let s = format!("The {} scheme with view-source is not supported", url.scheme); let s = format!("The {} scheme with view-source is not supported", url.scheme);
send_error(url, s, senders); send_error(url, s, start_chan);
return; return;
} }
}; };
@ -84,7 +99,7 @@ fn load(mut load_data: LoadData, start_chan: Sender<TargetedLoadResponse>, cooki
iters = iters + 1; iters = iters + 1;
if iters > max_redirects { if iters > max_redirects {
send_error(url, "too many redirects".to_string(), senders); send_error(url, "too many redirects".to_string(), start_chan);
return; return;
} }
@ -92,7 +107,7 @@ fn load(mut load_data: LoadData, start_chan: Sender<TargetedLoadResponse>, cooki
"http" | "https" => {} "http" | "https" => {}
_ => { _ => {
let s = format!("{} request, but we don't support that scheme", url.scheme); let s = format!("{} request, but we don't support that scheme", url.scheme);
send_error(url, s, senders); send_error(url, s, start_chan);
return; return;
} }
} }
@ -125,13 +140,13 @@ reason: \"certificate verify failed\" }]";
) => { ) => {
let mut image = resources_dir_path(); let mut image = resources_dir_path();
image.push("badcert.html"); image.push("badcert.html");
let load_data = LoadData::new(Url::from_file_path(&*image).unwrap(), senders.eventual_consumer); let load_data = LoadData::new(Url::from_file_path(&*image).unwrap(), start_chan);
file_loader::factory(load_data, senders.immediate_consumer); file_loader::factory(load_data, classifier);
return; return;
}, },
Err(e) => { Err(e) => {
println!("{:?}", e); println!("{:?}", e);
send_error(url, e.description().to_string(), senders); send_error(url, e.description().to_string(), start_chan);
return; return;
} }
}; };
@ -179,13 +194,13 @@ reason: \"certificate verify failed\" }]";
let mut writer = match req.start() { let mut writer = match req.start() {
Ok(w) => w, Ok(w) => w,
Err(e) => { Err(e) => {
send_error(url, e.description().to_string(), senders); send_error(url, e.description().to_string(), start_chan);
return; return;
} }
}; };
match writer.write_all(&*data) { match writer.write_all(&*data) {
Err(e) => { Err(e) => {
send_error(url, e.description().to_string(), senders); send_error(url, e.description().to_string(), start_chan);
return; return;
} }
_ => {} _ => {}
@ -200,7 +215,7 @@ reason: \"certificate verify failed\" }]";
match req.start() { match req.start() {
Ok(w) => w, Ok(w) => w,
Err(e) => { Err(e) => {
send_error(url, e.description().to_string(), senders); send_error(url, e.description().to_string(), start_chan);
return; return;
} }
} }
@ -209,7 +224,7 @@ reason: \"certificate verify failed\" }]";
let mut response = match writer.send() { let mut response = match writer.send() {
Ok(r) => r, Ok(r) => r,
Err(e) => { Err(e) => {
send_error(url, e.description().to_string(), senders); send_error(url, e.description().to_string(), start_chan);
return; return;
} }
}; };
@ -240,7 +255,7 @@ reason: \"certificate verify failed\" }]";
Some(ref c) => { Some(ref c) => {
if c.preflight { if c.preflight {
// The preflight lied // The preflight lied
send_error(url, "Preflight fetch inconsistent with main fetch".to_string(), senders); send_error(url, "Preflight fetch inconsistent with main fetch".to_string(), start_chan);
return; return;
} else { } else {
// XXXManishearth There are some CORS-related steps here, // XXXManishearth There are some CORS-related steps here,
@ -252,7 +267,7 @@ reason: \"certificate verify failed\" }]";
let new_url = match UrlParser::new().base_url(&url).parse(&new_url) { let new_url = match UrlParser::new().base_url(&url).parse(&new_url) {
Ok(u) => u, Ok(u) => u,
Err(e) => { Err(e) => {
send_error(url, e.to_string(), senders); send_error(url, e.to_string(), start_chan);
return; return;
} }
}; };
@ -268,7 +283,7 @@ reason: \"certificate verify failed\" }]";
} }
if redirected_to.contains(&url) { if redirected_to.contains(&url) {
send_error(url, "redirect loop".to_string(), senders); send_error(url, "redirect loop".to_string(), start_chan);
return; return;
} }
@ -291,11 +306,6 @@ reason: \"certificate verify failed\" }]";
metadata.headers = Some(adjusted_headers); metadata.headers = Some(adjusted_headers);
metadata.status = Some(response.status_raw().clone()); metadata.status = Some(response.status_raw().clone());
let progress_chan = match start_sending_opt(senders, metadata) {
Ok(p) => p,
_ => return
};
let mut encoding_str: Option<String> = None; let mut encoding_str: Option<String> = None;
//FIXME: Implement Content-Encoding Header https://github.com/hyperium/hyper/issues/391 //FIXME: Implement Content-Encoding Header https://github.com/hyperium/hyper/issues/391
if let Some(encodings) = response.headers.get_raw("content-encoding") { if let Some(encodings) = response.headers.get_raw("content-encoding") {
@ -313,14 +323,14 @@ reason: \"certificate verify failed\" }]";
Some(encoding) => { Some(encoding) => {
if encoding == "gzip" { if encoding == "gzip" {
let mut response_decoding = GzDecoder::new(response).unwrap(); let mut response_decoding = GzDecoder::new(response).unwrap();
send_data(&mut response_decoding, progress_chan); send_data(&mut response_decoding, start_chan, metadata, classifier);
} else if encoding == "deflate" { } else if encoding == "deflate" {
let mut response_decoding = DeflateDecoder::new(response); let mut response_decoding = DeflateDecoder::new(response);
send_data(&mut response_decoding, progress_chan); send_data(&mut response_decoding, start_chan, metadata, classifier);
} }
}, },
None => { None => {
send_data(&mut response, progress_chan); send_data(&mut response, start_chan, metadata, classifier);
} }
} }
@ -329,25 +339,35 @@ reason: \"certificate verify failed\" }]";
} }
} }
fn send_data<R: Read>(reader: &mut R, progress_chan: Sender<ProgressMsg>) { fn send_data<R: Read>(reader: &mut R,
loop { start_chan: Sender<LoadResponse>,
let mut buf = Vec::with_capacity(1024); metadata: Metadata,
classifier: Arc<MIMEClassifier>) {
let (progress_chan, mut chunk) = {
let buf = match read_block(reader) {
Ok(ReadResult::Payload(buf)) => buf,
_ => vec!(),
};
let p = match start_sending_sniffed_opt(start_chan, metadata, classifier, &buf) {
Ok(p) => p,
_ => return
};
(p, buf)
};
unsafe { buf.set_len(1024); } loop {
match reader.read(buf.as_mut_slice()) { if progress_chan.send(Payload(chunk)).is_err() {
Ok(len) if len > 0 => {
unsafe { buf.set_len(len); }
if progress_chan.send(Payload(buf)).is_err() {
// The send errors when the receiver is out of scope, // The send errors when the receiver is out of scope,
// which will happen if the fetch has timed out (or has been aborted) // which will happen if the fetch has timed out (or has been aborted)
// so we don't need to continue with the loading of the file here. // so we don't need to continue with the loading of the file here.
return; return;
} }
chunk = match read_block(reader) {
Ok(ReadResult::Payload(buf)) => buf,
Ok(ReadResult::EOF) | Err(_) => break,
};
} }
Ok(_) | Err(_) => {
let _ = progress_chan.send(Done(Ok(()))); let _ = progress_chan.send(Done(Ok(())));
break;
}
}
}
} }

View file

@ -428,11 +428,10 @@ mod tests {
use net_traits::image_cache_task::ImageResponseMsg::*; use net_traits::image_cache_task::ImageResponseMsg::*;
use net_traits::image_cache_task::Msg::*; use net_traits::image_cache_task::Msg::*;
use resource_task::{start_sending, ResponseSenders}; use resource_task::start_sending;
use net_traits::{ControlMsg, Metadata, ProgressMsg, ResourceTask}; use net_traits::{ControlMsg, Metadata, ProgressMsg, ResourceTask};
use net_traits::image_cache_task::{ImageCacheTask, ImageResponseMsg, Msg}; use net_traits::image_cache_task::{ImageCacheTask, ImageResponseMsg, Msg};
use net_traits::ProgressMsg::{Payload, Done}; use net_traits::ProgressMsg::{Payload, Done};
use sniffer_task;
use profile::time; use profile::time;
use std::sync::mpsc::{Sender, channel, Receiver}; use std::sync::mpsc::{Sender, channel, Receiver};
use url::Url; use url::Url;
@ -534,12 +533,7 @@ mod tests {
loop { loop {
match port.recv().unwrap() { match port.recv().unwrap() {
ControlMsg::Load(response) => { ControlMsg::Load(response) => {
let sniffer_task = sniffer_task::new_sniffer_task(); let chan = start_sending(response.consumer, Metadata::default(
let senders = ResponseSenders {
immediate_consumer: sniffer_task,
eventual_consumer: response.consumer.clone(),
};
let chan = start_sending(senders, Metadata::default(
Url::parse("file:///fake").unwrap())); Url::parse("file:///fake").unwrap()));
on_load.invoke(chan); on_load.invoke(chan);
} }
@ -709,12 +703,7 @@ mod tests {
loop { loop {
match port.recv().unwrap() { match port.recv().unwrap() {
ControlMsg::Load(response) => { ControlMsg::Load(response) => {
let sniffer_task = sniffer_task::new_sniffer_task(); let chan = start_sending(response.consumer, Metadata::default(
let senders = ResponseSenders {
immediate_consumer: sniffer_task,
eventual_consumer: response.consumer.clone(),
};
let chan = start_sending(senders, Metadata::default(
Url::parse("file:///fake").unwrap())); Url::parse("file:///fake").unwrap()));
chan.send(Payload(test_image_bin())); chan.send(Payload(test_image_bin()));
chan.send(Done(Ok(()))); chan.send(Done(Ok(())));
@ -763,12 +752,7 @@ mod tests {
loop { loop {
match port.recv().unwrap() { match port.recv().unwrap() {
ControlMsg::Load(response) => { ControlMsg::Load(response) => {
let sniffer_task = sniffer_task::new_sniffer_task(); let chan = start_sending(response.consumer, Metadata::default(
let senders = ResponseSenders {
immediate_consumer: sniffer_task,
eventual_consumer: response.consumer.clone(),
};
let chan = start_sending(senders, Metadata::default(
Url::parse("file:///fake").unwrap())); Url::parse("file:///fake").unwrap()));
chan.send(Payload(test_image_bin())); chan.send(Payload(test_image_bin()));
chan.send(Done(Err("".to_string()))); chan.send(Done(Err("".to_string())));

View file

@ -46,7 +46,7 @@ pub mod image_cache_task;
pub mod pub_domains; pub mod pub_domains;
pub mod resource_task; pub mod resource_task;
pub mod storage_task; pub mod storage_task;
mod sniffer_task; mod mime_classifier;
/// An implementation of the [Fetch spec](http://fetch.spec.whatwg.org/) /// An implementation of the [Fetch spec](http://fetch.spec.whatwg.org/)
pub mod fetch { pub mod fetch {

File diff suppressed because it is too large Load diff

View file

@ -8,14 +8,14 @@ use about_loader;
use data_loader; use data_loader;
use file_loader; use file_loader;
use http_loader; use http_loader;
use sniffer_task;
use sniffer_task::SnifferTask;
use cookie_storage::CookieStorage; use cookie_storage::CookieStorage;
use cookie; use cookie;
use mime_classifier::MIMEClassifier;
use net_traits::{ControlMsg, LoadData, LoadResponse}; use net_traits::{ControlMsg, LoadData, LoadResponse};
use net_traits::{Metadata, ProgressMsg, ResourceTask}; use net_traits::{Metadata, ProgressMsg, ResourceTask};
use net_traits::ProgressMsg::Done; use net_traits::ProgressMsg::Done;
use util::opts;
use util::task::spawn_named; use util::task::spawn_named;
use hyper::header::UserAgent; use hyper::header::UserAgent;
@ -29,6 +29,7 @@ use std::collections::HashMap;
use std::env; use std::env;
use std::fs::File; use std::fs::File;
use std::io::{BufReader, Read}; use std::io::{BufReader, Read};
use std::sync::Arc;
use std::sync::mpsc::{channel, Receiver, Sender}; use std::sync::mpsc::{channel, Receiver, Sender};
use std::thunk::Invoke; use std::thunk::Invoke;
@ -58,32 +59,41 @@ pub fn global_init() {
} }
} }
/// A LoadResponse directed at a particular consumer /// For use by loaders in responding to a Load message.
pub struct TargetedLoadResponse { pub fn start_sending(start_chan: Sender<LoadResponse>, metadata: Metadata) -> Sender<ProgressMsg> {
pub load_response: LoadResponse, start_sending_opt(start_chan, metadata).ok().unwrap()
pub consumer: Sender<LoadResponse>,
} }
// Data structure containing ports /// For use by loaders in responding to a Load message that allows content sniffing.
pub struct ResponseSenders { pub fn start_sending_sniffed(start_chan: Sender<LoadResponse>, metadata: Metadata,
pub immediate_consumer: Sender<TargetedLoadResponse>, classifier: Arc<MIMEClassifier>, partial_body: &Vec<u8>)
pub eventual_consumer: Sender<LoadResponse>, -> Sender<ProgressMsg> {
start_sending_sniffed_opt(start_chan, metadata, classifier, partial_body).ok().unwrap()
}
/// For use by loaders in responding to a Load message that allows content sniffing.
pub fn start_sending_sniffed_opt(start_chan: Sender<LoadResponse>, mut metadata: Metadata,
classifier: Arc<MIMEClassifier>, partial_body: &Vec<u8>)
-> Result<Sender<ProgressMsg>, ()> {
if opts::get().sniff_mime_types {
// TODO: should be calculated in the resource loader, from pull requeset #4094
let nosniff = false;
let check_for_apache_bug = false;
metadata.content_type = classifier.classify(nosniff, check_for_apache_bug,
&metadata.content_type, &partial_body);
}
start_sending_opt(start_chan, metadata)
} }
/// For use by loaders in responding to a Load message. /// For use by loaders in responding to a Load message.
pub fn start_sending(senders: ResponseSenders, metadata: Metadata) -> Sender<ProgressMsg> { pub fn start_sending_opt(start_chan: Sender<LoadResponse>, metadata: Metadata) -> Result<Sender<ProgressMsg>, ()> {
start_sending_opt(senders, metadata).ok().unwrap()
}
/// For use by loaders in responding to a Load message.
pub fn start_sending_opt(senders: ResponseSenders, metadata: Metadata) -> Result<Sender<ProgressMsg>, ()> {
let (progress_chan, progress_port) = channel(); let (progress_chan, progress_port) = channel();
let result = senders.immediate_consumer.send(TargetedLoadResponse { let result = start_chan.send(LoadResponse {
load_response: LoadResponse {
metadata: metadata, metadata: metadata,
progress_port: progress_port, progress_port: progress_port,
},
consumer: senders.eventual_consumer
}); });
match result { match result {
Ok(_) => Ok(progress_chan), Ok(_) => Ok(progress_chan),
@ -94,10 +104,9 @@ pub fn start_sending_opt(senders: ResponseSenders, metadata: Metadata) -> Result
/// Create a ResourceTask /// Create a ResourceTask
pub fn new_resource_task(user_agent: Option<String>) -> ResourceTask { pub fn new_resource_task(user_agent: Option<String>) -> ResourceTask {
let (setup_chan, setup_port) = channel(); let (setup_chan, setup_port) = channel();
let sniffer_task = sniffer_task::new_sniffer_task();
let setup_chan_clone = setup_chan.clone(); let setup_chan_clone = setup_chan.clone();
spawn_named("ResourceManager".to_owned(), move || { spawn_named("ResourceManager".to_owned(), move || {
ResourceManager::new(setup_port, user_agent, sniffer_task, setup_chan_clone).start(); ResourceManager::new(setup_port, user_agent, setup_chan_clone).start();
}); });
setup_chan setup_chan
} }
@ -139,20 +148,20 @@ pub fn replace_hosts(mut load_data: LoadData, host_table: *mut HashMap<String, S
struct ResourceManager { struct ResourceManager {
from_client: Receiver<ControlMsg>, from_client: Receiver<ControlMsg>,
user_agent: Option<String>, user_agent: Option<String>,
sniffer_task: SnifferTask,
cookie_storage: CookieStorage, cookie_storage: CookieStorage,
resource_task: Sender<ControlMsg>, resource_task: Sender<ControlMsg>,
mime_classifier: Arc<MIMEClassifier>,
} }
impl ResourceManager { impl ResourceManager {
fn new(from_client: Receiver<ControlMsg>, user_agent: Option<String>, sniffer_task: SnifferTask, fn new(from_client: Receiver<ControlMsg>, user_agent: Option<String>,
resource_task: Sender<ControlMsg>) -> ResourceManager { resource_task: Sender<ControlMsg>) -> ResourceManager {
ResourceManager { ResourceManager {
from_client: from_client, from_client: from_client,
user_agent: user_agent, user_agent: user_agent,
sniffer_task: sniffer_task,
cookie_storage: CookieStorage::new(), cookie_storage: CookieStorage::new(),
resource_task: resource_task, resource_task: resource_task,
mime_classifier: Arc::new(MIMEClassifier::new()),
} }
} }
} }
@ -193,15 +202,11 @@ impl ResourceManager {
} }
self.user_agent.as_ref().map(|ua| load_data.headers.set(UserAgent(ua.clone()))); self.user_agent.as_ref().map(|ua| load_data.headers.set(UserAgent(ua.clone())));
let senders = ResponseSenders {
immediate_consumer: self.sniffer_task.clone(),
eventual_consumer: load_data.consumer.clone(),
};
fn from_factory(factory: fn(LoadData, Sender<TargetedLoadResponse>)) fn from_factory(factory: fn(LoadData, Arc<MIMEClassifier>))
-> Box<Invoke<(LoadData, Sender<TargetedLoadResponse>)> + Send> { -> Box<Invoke<(LoadData, Arc<MIMEClassifier>)> + Send> {
box move |(load_data, start_chan)| { box move |(load_data, classifier)| {
factory(load_data, start_chan) factory(load_data, classifier)
} }
} }
@ -212,14 +217,14 @@ impl ResourceManager {
"about" => from_factory(about_loader::factory), "about" => from_factory(about_loader::factory),
_ => { _ => {
debug!("resource_task: no loader for scheme {}", load_data.url.scheme); debug!("resource_task: no loader for scheme {}", load_data.url.scheme);
start_sending(senders, Metadata::default(load_data.url)) start_sending(load_data.consumer, Metadata::default(load_data.url))
.send(ProgressMsg::Done(Err("no loader for scheme".to_string()))).unwrap(); .send(ProgressMsg::Done(Err("no loader for scheme".to_string()))).unwrap();
return return
} }
}; };
debug!("resource_task: loading url: {}", load_data.url.serialize()); debug!("resource_task: loading url: {}", load_data.url.serialize());
loader.invoke((load_data, self.sniffer_task.clone())); loader.invoke((load_data, self.mime_classifier.clone()));
} }
} }

View file

@ -1,44 +0,0 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
//! A task that sniffs data
use std::sync::mpsc::{channel, Receiver, Sender};
use std::thread::Builder;
use resource_task::{TargetedLoadResponse};
pub type SnifferTask = Sender<TargetedLoadResponse>;
pub fn new_sniffer_task() -> SnifferTask {
let(sen, rec) = channel();
let builder = Builder::new().name("SnifferManager".to_string());
builder.spawn(move || {
SnifferManager::new(rec).start();
}).unwrap();
sen
}
struct SnifferManager {
data_receiver: Receiver<TargetedLoadResponse>,
}
impl SnifferManager {
fn new(data_receiver: Receiver <TargetedLoadResponse>) -> SnifferManager {
SnifferManager {
data_receiver: data_receiver,
}
}
}
impl SnifferManager {
fn start(self) {
loop {
match self.data_receiver.recv() {
Ok(snif_data) => {
let _ = snif_data.consumer.send(snif_data.load_response);
}
Err(_) => break,
}
}
}
}

View file

@ -132,6 +132,9 @@ pub struct Opts {
/// A specific path to find required resources (such as user-agent.css). /// A specific path to find required resources (such as user-agent.css).
pub resources_path: Option<String>, pub resources_path: Option<String>,
/// Whether MIME sniffing should be used
pub sniff_mime_types: bool,
} }
fn print_usage(app: &str, opts: &[getopts::OptGroup]) { fn print_usage(app: &str, opts: &[getopts::OptGroup]) {
@ -212,6 +215,7 @@ pub fn default_opts() -> Opts {
validate_display_list_geometry: false, validate_display_list_geometry: false,
profile_tasks: false, profile_tasks: false,
resources_path: None, resources_path: None,
sniff_mime_types: false,
} }
} }
@ -244,6 +248,7 @@ pub fn from_cmdline_args(args: &[String]) -> bool {
getopts::optflag("h", "help", "Print this message"), getopts::optflag("h", "help", "Print this message"),
getopts::optopt("r", "render-api", "Set the rendering API to use", "gl|mesa"), getopts::optopt("r", "render-api", "Set the rendering API to use", "gl|mesa"),
getopts::optopt("", "resources-path", "Path to find static resources", "/home/servo/resources"), getopts::optopt("", "resources-path", "Path to find static resources", "/home/servo/resources"),
getopts::optflag("", "sniff-mime-types" , "Enable MIME sniffing"),
); );
let opt_match = match getopts::getopts(args, opts.as_slice()) { let opt_match = match getopts::getopts(args, opts.as_slice()) {
@ -371,6 +376,7 @@ pub fn from_cmdline_args(args: &[String]) -> bool {
relayout_event: debug_options.contains(&"relayout-event"), relayout_event: debug_options.contains(&"relayout-event"),
validate_display_list_geometry: debug_options.contains(&"validate-display-list-geometry"), validate_display_list_geometry: debug_options.contains(&"validate-display-list-geometry"),
resources_path: opt_match.opt_str("resources-path"), resources_path: opt_match.opt_str("resources-path"),
sniff_mime_types: opt_match.opt_present("sniff-mime-types"),
}; };
set_opts(opts); set_opts(opts);

View file

@ -0,0 +1 @@
wOFF

Binary file not shown.

View file

@ -0,0 +1,157 @@
%PDF-1.2
%âãÏÓ
9 0 obj
<<
/Length 10 0 R
/Filter /FlateDecode
>>
stream
H‰Í<EFBFBD>ÑJÃ0†Ÿ ïð{§²fç$M“ínÒ-<14><EFBFBD>[&jeŠâÛÛ¤ ñ~$ÉÉÿ}ÉÉ…¬Ij«¬ÌsÀ—Ç~€XÖ-],÷‚$Y—÷Ó)ü'N«u­1!œ„ÀVÙ?ŸÁ?
žb1RbbœÒ‰ÉH²[¹™TD:#ž&Ø­ÙÌX®¦øiç»$qnf¬ƒ¿¶]»ÀõËîãaÿ¶{ÿÂØ£‰×q|JªLs]™QÒI¸¬jî„%¯Œ9Øé`ß঺¼ÅU»ite<74>zÛ$›’Ú¿OeBÆÄÒ¯á¸Råþ@zÜ—úóÿgª¼ø<õ¡ª
endstream
endobj
10 0 obj
246
endobj
4 0 obj
<<
/Type /Page
/Parent 5 0 R
/Resources <<
/Font <<
/F0 6 0 R
/F1 7 0 R
>>
/ProcSet 2 0 R
>>
/Contents 9 0 R
>>
endobj
6 0 obj
<<
/Type /Font
/Subtype /TrueType
/Name /F0
/BaseFont /Arial
/Encoding /WinAnsiEncoding
>>
endobj
7 0 obj
<<
/Type /Font
/Subtype /TrueType
/Name /F1
/BaseFont /BookAntiqua,Bold
/FirstChar 31
/LastChar 255
/Widths [ 750 250 278 402 606 500 889 833 227 333 333 444 606 250 333 250
296 500 500 500 500 500 500 500 500 500 500 250 250 606 606 606
444 747 778 667 722 833 611 556 833 833 389 389 778 611 1000 833
833 611 833 722 611 667 778 778 1000 667 667 667 333 606 333 606
500 333 500 611 444 611 500 389 556 611 333 333 611 333 889 611
556 611 611 389 444 333 611 556 833 500 556 500 310 606 310 606
750 500 750 333 500 500 1000 500 500 333 1000 611 389 1000 750 750
750 750 278 278 500 500 606 500 1000 333 998 444 389 833 750 750
667 250 278 500 500 606 500 606 500 333 747 438 500 606 333 747
500 400 549 361 361 333 576 641 250 333 361 488 500 889 890 889
444 778 778 778 778 778 778 1000 722 611 611 611 611 389 389 389
389 833 833 833 833 833 833 833 606 833 778 778 778 778 667 611
611 500 500 500 500 500 500 778 444 500 500 500 500 333 333 333
333 556 611 556 556 556 556 556 549 556 611 611 611 611 556 611
556 ]
/Encoding /WinAnsiEncoding
/FontDescriptor 8 0 R
>>
endobj
8 0 obj
<<
/Type /FontDescriptor
/FontName /BookAntiqua,Bold
/Flags 16418
/FontBBox [ -250 -260 1236 930 ]
/MissingWidth 750
/StemV 146
/StemH 146
/ItalicAngle 0
/CapHeight 930
/XHeight 651
/Ascent 930
/Descent 260
/Leading 210
/MaxWidth 1030
/AvgWidth 460
>>
endobj
2 0 obj
[ /PDF /Text ]
endobj
5 0 obj
<<
/Kids [4 0 R ]
/Count 1
/Type /Pages
/MediaBox [ 0 0 612 792 ]
>>
endobj
1 0 obj
<<
/Creator (1725.fm)
/CreationDate (1-Jan-3 18:15PM)
/Title (1725.PDF)
/Author (Unknown)
/Producer (Acrobat PDFWriter 3.02 for Windows)
/Keywords ()
/Subject ()
>>
endobj
3 0 obj
<<
/Pages 5 0 R
/Type /Catalog
/DefaultGray 11 0 R
/DefaultRGB 12 0 R
>>
endobj
11 0 obj
[/CalGray
<<
/WhitePoint [0.9505 1 1.0891 ]
/Gamma 0.2468
>>
]
endobj
12 0 obj
[/CalRGB
<<
/WhitePoint [0.9505 1 1.0891 ]
/Gamma [0.2468 0.2468 0.2468 ]
/Matrix [0.4361 0.2225 0.0139 0.3851 0.7169 0.0971 0.1431 0.0606 0.7141 ]
>>
]
endobj
xref
0 13
0000000000 65535 f
0000002172 00000 n
0000002046 00000 n
0000002363 00000 n
0000000375 00000 n
0000002080 00000 n
0000000518 00000 n
0000000633 00000 n
0000001760 00000 n
0000000021 00000 n
0000000352 00000 n
0000002460 00000 n
0000002548 00000 n
trailer
<<
/Size 13
/Root 3 0 R
/Info 1 0 R
/ID [<47149510433dd4882f05f8c124223734><47149510433dd4882f05f8c124223734>]
>>
startxref
2726
%%EOF

View file

@ -0,0 +1 @@
%!PS-Adobe-

Binary file not shown.

View file

@ -0,0 +1 @@
PK

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.3 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.3 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 14 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

View file

@ -0,0 +1,3 @@
<A

View file

@ -0,0 +1,3 @@
<a

View file

@ -0,0 +1,3 @@
<A>

View file

@ -0,0 +1,3 @@
<a>

View file

@ -0,0 +1,3 @@
<B

View file

@ -0,0 +1,3 @@
<b

View file

@ -0,0 +1,3 @@
<B>

View file

@ -0,0 +1,3 @@
<b>

View file

@ -0,0 +1,3 @@
<BODY

View file

@ -0,0 +1,3 @@
<body

View file

@ -0,0 +1,3 @@
<BODY>

View file

@ -0,0 +1,3 @@
<body>

View file

@ -0,0 +1,3 @@
<BR

View file

@ -0,0 +1,3 @@
<br

View file

@ -0,0 +1,3 @@
<BR>

View file

@ -0,0 +1,3 @@
<br>

View file

@ -0,0 +1,3 @@
<!--

View file

@ -0,0 +1,3 @@
<!--

View file

@ -0,0 +1,3 @@
<!-->

View file

@ -0,0 +1,3 @@
<!-->

View file

@ -0,0 +1,3 @@
<DIV

View file

@ -0,0 +1,3 @@
<div

View file

@ -0,0 +1,3 @@
<DIV>

View file

@ -0,0 +1,3 @@
<div>

View file

@ -0,0 +1,3 @@
<!DOCTYPE HTML

View file

@ -0,0 +1,3 @@
<!doctype html

View file

@ -0,0 +1,4 @@
<!DOCTYPE HTML>

View file

@ -0,0 +1,4 @@
<!doctype html>

View file

@ -0,0 +1,3 @@
<FONT

View file

@ -0,0 +1,3 @@
<font

View file

@ -0,0 +1,3 @@
<FONT>

View file

@ -0,0 +1,3 @@
<font>

View file

@ -0,0 +1,3 @@
<H1

View file

@ -0,0 +1,3 @@
<h1

View file

@ -0,0 +1,3 @@
<H1>

View file

@ -0,0 +1,3 @@
<h1>

View file

@ -0,0 +1,3 @@
<HEAD

View file

@ -0,0 +1,3 @@
<head

View file

@ -0,0 +1,3 @@
<HEAD>

View file

@ -0,0 +1,3 @@
<head>

View file

@ -0,0 +1,3 @@
<IFRAME

View file

@ -0,0 +1,3 @@
<iframe

View file

@ -0,0 +1,3 @@
<IFRAME>

View file

@ -0,0 +1,3 @@
<iframe>

View file

@ -0,0 +1,3 @@
<P

View file

@ -0,0 +1,3 @@
<p

View file

@ -0,0 +1,3 @@
<P>

View file

@ -0,0 +1,3 @@
<p>

View file

@ -0,0 +1,3 @@
<HTML

View file

@ -0,0 +1,3 @@
<html

View file

@ -0,0 +1,3 @@
<HTML>

View file

@ -0,0 +1,3 @@
<html>

View file

@ -0,0 +1,3 @@
<SCRIPT

View file

@ -0,0 +1,3 @@
<script

View file

@ -0,0 +1,3 @@
<SCRIPT>

View file

@ -0,0 +1,3 @@
<script>

View file

@ -0,0 +1,3 @@
<STYLE

View file

@ -0,0 +1,3 @@
<style

View file

@ -0,0 +1,3 @@
<STYLE>

View file

@ -0,0 +1,3 @@
<style>

View file

@ -0,0 +1,3 @@
<TABLE

View file

@ -0,0 +1,3 @@
<table

View file

@ -0,0 +1,3 @@
<TABLE>

View file

@ -0,0 +1,3 @@
<table>

View file

@ -0,0 +1,3 @@
<TITLE

View file

@ -0,0 +1,3 @@
<title

View file

@ -0,0 +1,3 @@
<TITLE>

View file

@ -0,0 +1,3 @@
<title>

Binary file not shown.

Some files were not shown because too many files have changed in this diff Show more