Implement MIME sniffing.

This commit is contained in:
Nathan Climer 2014-11-14 08:45:32 -05:00 committed by Josh Matthews
parent a277036dd9
commit 44930b0fb0
105 changed files with 2021 additions and 2 deletions

View file

@ -96,7 +96,7 @@ fn assert_parse(url: &'static str,
use sniffer_task; use sniffer_task;
let (start_chan, start_port) = channel(); let (start_chan, start_port) = channel();
let sniffer_task = sniffer_task::new_sniffer_task(); let sniffer_task = sniffer_task::new_mock_sniffer_task();
load(LoadData::new(Url::parse(url).unwrap(), start_chan), sniffer_task); load(LoadData::new(Url::parse(url).unwrap(), start_chan), sniffer_task);
let response = start_port.recv().unwrap(); let response = start_port.recv().unwrap();

View file

@ -47,6 +47,7 @@ pub mod pub_domains;
pub mod resource_task; pub mod resource_task;
pub mod storage_task; pub mod storage_task;
mod sniffer_task; mod sniffer_task;
mod mime_classifier;
/// An implementation of the [Fetch spec](http://fetch.spec.whatwg.org/) /// An implementation of the [Fetch spec](http://fetch.spec.whatwg.org/)
pub mod fetch { pub mod fetch {

File diff suppressed because it is too large Load diff

View file

@ -5,7 +5,8 @@
//! A task that sniffs data //! A task that sniffs data
use std::sync::mpsc::{channel, Receiver, Sender}; use std::sync::mpsc::{channel, Receiver, Sender};
use std::thread::Builder; use std::thread::Builder;
use resource_task::{TargetedLoadResponse}; use mime_classifier::MIMEClassifier;
use resource_task::{TargetedLoadResponse, LoadResponse, ProgressMsg};
pub type SnifferTask = Sender<TargetedLoadResponse>; pub type SnifferTask = Sender<TargetedLoadResponse>;
@ -20,17 +21,89 @@ pub fn new_sniffer_task() -> SnifferTask {
struct SnifferManager { struct SnifferManager {
data_receiver: Receiver<TargetedLoadResponse>, data_receiver: Receiver<TargetedLoadResponse>,
mime_classifier: MIMEClassifier
} }
impl SnifferManager { impl SnifferManager {
fn new(data_receiver: Receiver <TargetedLoadResponse>) -> SnifferManager { fn new(data_receiver: Receiver <TargetedLoadResponse>) -> SnifferManager {
SnifferManager { SnifferManager {
data_receiver: data_receiver, data_receiver: data_receiver,
mime_classifier: MIMEClassifier::new()
} }
} }
} }
impl SnifferManager { impl SnifferManager {
fn start(self) {
for mut snif_data in self.data_receiver.iter() {
// Read all the data
let mut resource_data = vec!();
loop {
match snif_data.load_response.progress_port.recv().unwrap() {
ProgressMsg::Payload(data) => {
resource_data.push_all(data.as_slice());
}
ProgressMsg::Done(res) => {
let (new_progress_chan, new_progress_port) = channel();
// TODO: should be calculated in the resource loader, from pull requeset #4094
let nosniff = false;
let check_for_apache_bug = false;
// We have all the data, go ahead and sniff it and replace the Content-Type
if res.is_ok() {
snif_data.load_response.metadata.content_type = self.mime_classifier.classify(
nosniff,check_for_apache_bug,&snif_data.load_response.metadata.content_type,
&resource_data
);
}
let load_response = LoadResponse {
progress_port: new_progress_port,
metadata: snif_data.load_response.metadata,
};
if snif_data.consumer.send(load_response).is_err() {
break;
}
if resource_data.len() > 0 {
new_progress_chan.send(ProgressMsg::Payload(resource_data)).unwrap();
}
new_progress_chan.send(ProgressMsg::Done(res)).unwrap();
return;
}
}
}
} // end for
}
}
#[cfg(test)]
pub fn new_mock_sniffer_task() -> SnifferTask {
let(sen, rec) = channel();
let builder = TaskBuilder::new().named("SnifferManager");
builder.spawn(move || {
MockSnifferManager::new(rec).start();
});
sen
}
#[cfg(test)]
struct MockSnifferManager {
data_receiver: Receiver<TargetedLoadResponse>,
}
#[cfg(test)]
impl MockSnifferManager {
fn new(data_receiver: Receiver <TargetedLoadResponse>) -> MockSnifferManager {
MockSnifferManager {
data_receiver: data_receiver,
}
}
}
#[cfg(test)]
impl MockSnifferManager {
fn start(self) { fn start(self) {
loop { loop {
match self.data_receiver.recv() { match self.data_receiver.recv() {

View file

@ -0,0 +1 @@
wOFF

Binary file not shown.

View file

@ -0,0 +1,157 @@
%PDF-1.2
%âãÏÓ
9 0 obj
<<
/Length 10 0 R
/Filter /FlateDecode
>>
stream
H‰Í<EFBFBD>ÑJÃ0†Ÿ ïð{§²fç$M“ínÒ-<14><EFBFBD>[&jeŠâÛÛ¤ ñ~$ÉÉÿ}ÉÉ…¬Ij«¬ÌsÀ—Ç~€XÖ-],÷‚$Y—÷Ó)ü'N«u­1!œ„ÀVÙ?ŸÁ?
žb1RbbœÒ‰ÉH²[¹™TD:#ž&Ø­ÙÌX®¦øiç»$qnf¬ƒ¿¶]»ÀõËîãaÿ¶{ÿÂØ£‰×q|JªLs]™QÒI¸¬jî„%¯Œ9Øé`ß঺¼ÅU»ite<74>zÛ$›’Ú¿OeBÆÄÒ¯á¸Råþ@zÜ—úóÿgª¼ø<õ¡ª
endstream
endobj
10 0 obj
246
endobj
4 0 obj
<<
/Type /Page
/Parent 5 0 R
/Resources <<
/Font <<
/F0 6 0 R
/F1 7 0 R
>>
/ProcSet 2 0 R
>>
/Contents 9 0 R
>>
endobj
6 0 obj
<<
/Type /Font
/Subtype /TrueType
/Name /F0
/BaseFont /Arial
/Encoding /WinAnsiEncoding
>>
endobj
7 0 obj
<<
/Type /Font
/Subtype /TrueType
/Name /F1
/BaseFont /BookAntiqua,Bold
/FirstChar 31
/LastChar 255
/Widths [ 750 250 278 402 606 500 889 833 227 333 333 444 606 250 333 250
296 500 500 500 500 500 500 500 500 500 500 250 250 606 606 606
444 747 778 667 722 833 611 556 833 833 389 389 778 611 1000 833
833 611 833 722 611 667 778 778 1000 667 667 667 333 606 333 606
500 333 500 611 444 611 500 389 556 611 333 333 611 333 889 611
556 611 611 389 444 333 611 556 833 500 556 500 310 606 310 606
750 500 750 333 500 500 1000 500 500 333 1000 611 389 1000 750 750
750 750 278 278 500 500 606 500 1000 333 998 444 389 833 750 750
667 250 278 500 500 606 500 606 500 333 747 438 500 606 333 747
500 400 549 361 361 333 576 641 250 333 361 488 500 889 890 889
444 778 778 778 778 778 778 1000 722 611 611 611 611 389 389 389
389 833 833 833 833 833 833 833 606 833 778 778 778 778 667 611
611 500 500 500 500 500 500 778 444 500 500 500 500 333 333 333
333 556 611 556 556 556 556 556 549 556 611 611 611 611 556 611
556 ]
/Encoding /WinAnsiEncoding
/FontDescriptor 8 0 R
>>
endobj
8 0 obj
<<
/Type /FontDescriptor
/FontName /BookAntiqua,Bold
/Flags 16418
/FontBBox [ -250 -260 1236 930 ]
/MissingWidth 750
/StemV 146
/StemH 146
/ItalicAngle 0
/CapHeight 930
/XHeight 651
/Ascent 930
/Descent 260
/Leading 210
/MaxWidth 1030
/AvgWidth 460
>>
endobj
2 0 obj
[ /PDF /Text ]
endobj
5 0 obj
<<
/Kids [4 0 R ]
/Count 1
/Type /Pages
/MediaBox [ 0 0 612 792 ]
>>
endobj
1 0 obj
<<
/Creator (1725.fm)
/CreationDate (1-Jan-3 18:15PM)
/Title (1725.PDF)
/Author (Unknown)
/Producer (Acrobat PDFWriter 3.02 for Windows)
/Keywords ()
/Subject ()
>>
endobj
3 0 obj
<<
/Pages 5 0 R
/Type /Catalog
/DefaultGray 11 0 R
/DefaultRGB 12 0 R
>>
endobj
11 0 obj
[/CalGray
<<
/WhitePoint [0.9505 1 1.0891 ]
/Gamma 0.2468
>>
]
endobj
12 0 obj
[/CalRGB
<<
/WhitePoint [0.9505 1 1.0891 ]
/Gamma [0.2468 0.2468 0.2468 ]
/Matrix [0.4361 0.2225 0.0139 0.3851 0.7169 0.0971 0.1431 0.0606 0.7141 ]
>>
]
endobj
xref
0 13
0000000000 65535 f
0000002172 00000 n
0000002046 00000 n
0000002363 00000 n
0000000375 00000 n
0000002080 00000 n
0000000518 00000 n
0000000633 00000 n
0000001760 00000 n
0000000021 00000 n
0000000352 00000 n
0000002460 00000 n
0000002548 00000 n
trailer
<<
/Size 13
/Root 3 0 R
/Info 1 0 R
/ID [<47149510433dd4882f05f8c124223734><47149510433dd4882f05f8c124223734>]
>>
startxref
2726
%%EOF

View file

@ -0,0 +1 @@
%!PS-Adobe-

Binary file not shown.

View file

@ -0,0 +1 @@
PK

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.3 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.3 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 14 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

View file

@ -0,0 +1,3 @@
<A

View file

@ -0,0 +1,3 @@
<a

View file

@ -0,0 +1,3 @@
<A>

View file

@ -0,0 +1,3 @@
<a>

View file

@ -0,0 +1,3 @@
<B

View file

@ -0,0 +1,3 @@
<b

View file

@ -0,0 +1,3 @@
<B>

View file

@ -0,0 +1,3 @@
<b>

View file

@ -0,0 +1,3 @@
<BODY

View file

@ -0,0 +1,3 @@
<body

View file

@ -0,0 +1,3 @@
<BODY>

View file

@ -0,0 +1,3 @@
<body>

View file

@ -0,0 +1,3 @@
<BR

View file

@ -0,0 +1,3 @@
<br

View file

@ -0,0 +1,3 @@
<BR>

View file

@ -0,0 +1,3 @@
<br>

View file

@ -0,0 +1,3 @@
<!--

View file

@ -0,0 +1,3 @@
<!--

View file

@ -0,0 +1,3 @@
<!-->

View file

@ -0,0 +1,3 @@
<!-->

View file

@ -0,0 +1,3 @@
<DIV

View file

@ -0,0 +1,3 @@
<div

View file

@ -0,0 +1,3 @@
<DIV>

View file

@ -0,0 +1,3 @@
<div>

View file

@ -0,0 +1,3 @@
<!DOCTYPE HTML

View file

@ -0,0 +1,3 @@
<!doctype html

View file

@ -0,0 +1,4 @@
<!DOCTYPE HTML>

View file

@ -0,0 +1,4 @@
<!doctype html>

View file

@ -0,0 +1,3 @@
<FONT

View file

@ -0,0 +1,3 @@
<font

View file

@ -0,0 +1,3 @@
<FONT>

View file

@ -0,0 +1,3 @@
<font>

View file

@ -0,0 +1,3 @@
<H1

View file

@ -0,0 +1,3 @@
<h1

View file

@ -0,0 +1,3 @@
<H1>

View file

@ -0,0 +1,3 @@
<h1>

View file

@ -0,0 +1,3 @@
<HEAD

View file

@ -0,0 +1,3 @@
<head

View file

@ -0,0 +1,3 @@
<HEAD>

View file

@ -0,0 +1,3 @@
<head>

View file

@ -0,0 +1,3 @@
<IFRAME

View file

@ -0,0 +1,3 @@
<iframe

View file

@ -0,0 +1,3 @@
<IFRAME>

View file

@ -0,0 +1,3 @@
<iframe>

View file

@ -0,0 +1,3 @@
<P

View file

@ -0,0 +1,3 @@
<p

View file

@ -0,0 +1,3 @@
<P>

View file

@ -0,0 +1,3 @@
<p>

View file

@ -0,0 +1,3 @@
<HTML

View file

@ -0,0 +1,3 @@
<html

View file

@ -0,0 +1,3 @@
<HTML>

View file

@ -0,0 +1,3 @@
<html>

View file

@ -0,0 +1,3 @@
<SCRIPT

View file

@ -0,0 +1,3 @@
<script

View file

@ -0,0 +1,3 @@
<SCRIPT>

View file

@ -0,0 +1,3 @@
<script>

View file

@ -0,0 +1,3 @@
<STYLE

View file

@ -0,0 +1,3 @@
<style

View file

@ -0,0 +1,3 @@
<STYLE>

View file

@ -0,0 +1,3 @@
<style>

View file

@ -0,0 +1,3 @@
<TABLE

View file

@ -0,0 +1,3 @@
<table

View file

@ -0,0 +1,3 @@
<TABLE>

View file

@ -0,0 +1,3 @@
<table>

View file

@ -0,0 +1,3 @@
<TITLE

View file

@ -0,0 +1,3 @@
<title

View file

@ -0,0 +1,3 @@
<TITLE>

View file

@ -0,0 +1,3 @@
<title>

Binary file not shown.

Binary file not shown.

View file

@ -0,0 +1 @@
 test_file

View file

@ -0,0 +1 @@
<feed>

View file

@ -0,0 +1,151 @@
<?xml version="1.0" encoding="windows-1252"?>
<rss version="2.0">
<channel>
<title>FeedForAll Sample Feed</title>
<description>RSS is a fascinating technology. The uses for RSS are expanding daily. Take a closer look at how various industries are using the benefits of RSS in their businesses.</description>
<link>http://www.feedforall.com/industry-solutions.htm</link>
<category domain="www.dmoz.com">Computers/Software/Internet/Site Management/Content Management</category>
<copyright>Copyright 2004 NotePage, Inc.</copyright>
<docs>http://blogs.law.harvard.edu/tech/rss</docs>
<language>en-us</language>
<lastBuildDate>Tue, 19 Oct 2004 13:39:14 -0400</lastBuildDate>
<managingEditor>marketing@feedforall.com</managingEditor>
<pubDate>Tue, 19 Oct 2004 13:38:55 -0400</pubDate>
<webMaster>webmaster@feedforall.com</webMaster>
<generator>FeedForAll Beta1 (0.0.1.8)</generator>
<image>
<url>http://www.feedforall.com/ffalogo48x48.gif</url>
<title>FeedForAll Sample Feed</title>
<link>http://www.feedforall.com/industry-solutions.htm</link>
<description>FeedForAll Sample Feed</description>
<width>48</width>
<height>48</height>
</image>
<item>
<title>RSS Solutions for Restaurants</title>
<description>&lt;b&gt;FeedForAll &lt;/b&gt;helps Restaurant&apos;s communicate with customers. Let your customers know the latest specials or events.&lt;br&gt;
&lt;br&gt;
RSS feed uses include:&lt;br&gt;
&lt;i&gt;&lt;font color=&quot;#FF0000&quot;&gt;Daily Specials &lt;br&gt;
Entertainment &lt;br&gt;
Calendar of Events &lt;/i&gt;&lt;/font&gt;</description>
<link>http://www.feedforall.com/restaurant.htm</link>
<category domain="www.dmoz.com">Computers/Software/Internet/Site Management/Content Management</category>
<comments>http://www.feedforall.com/forum</comments>
<pubDate>Tue, 19 Oct 2004 11:09:11 -0400</pubDate>
</item>
<item>
<title>RSS Solutions for Schools and Colleges</title>
<description>FeedForAll helps Educational Institutions communicate with students about school wide activities, events, and schedules.&lt;br&gt;
&lt;br&gt;
RSS feed uses include:&lt;br&gt;
&lt;i&gt;&lt;font color=&quot;#0000FF&quot;&gt;Homework Assignments &lt;br&gt;
School Cancellations &lt;br&gt;
Calendar of Events &lt;br&gt;
Sports Scores &lt;br&gt;
Clubs/Organization Meetings &lt;br&gt;
Lunches Menus &lt;/i&gt;&lt;/font&gt;</description>
<link>http://www.feedforall.com/schools.htm</link>
<category domain="www.dmoz.com">Computers/Software/Internet/Site Management/Content Management</category>
<comments>http://www.feedforall.com/forum</comments>
<pubDate>Tue, 19 Oct 2004 11:09:09 -0400</pubDate>
</item>
<item>
<title>RSS Solutions for Computer Service Companies</title>
<description>FeedForAll helps Computer Service Companies communicate with clients about cyber security and related issues. &lt;br&gt;
&lt;br&gt;
Uses include:&lt;br&gt;
&lt;i&gt;&lt;font color=&quot;#0000FF&quot;&gt;Cyber Security Alerts &lt;br&gt;
Specials&lt;br&gt;
Job Postings &lt;/i&gt;&lt;/font&gt;</description>
<link>http://www.feedforall.com/computer-service.htm</link>
<category domain="www.dmoz.com">Computers/Software/Internet/Site Management/Content Management</category>
<comments>http://www.feedforall.com/forum</comments>
<pubDate>Tue, 19 Oct 2004 11:09:07 -0400</pubDate>
</item>
<item>
<title>RSS Solutions for Governments</title>
<description>FeedForAll helps Governments communicate with the general public about positions on various issues, and keep the community aware of changes in important legislative issues. &lt;b&gt;&lt;i&gt;&lt;br&gt;
&lt;/b&gt;&lt;/i&gt;&lt;br&gt;
RSS uses Include:&lt;br&gt;
&lt;i&gt;&lt;font color=&quot;#00FF00&quot;&gt;Legislative Calendar&lt;br&gt;
Votes&lt;br&gt;
Bulletins&lt;/i&gt;&lt;/font&gt;</description>
<link>http://www.feedforall.com/government.htm</link>
<category domain="www.dmoz.com">Computers/Software/Internet/Site Management/Content Management</category>
<comments>http://www.feedforall.com/forum</comments>
<pubDate>Tue, 19 Oct 2004 11:09:05 -0400</pubDate>
</item>
<item>
<title>RSS Solutions for Politicians</title>
<description>FeedForAll helps Politicians communicate with the general public about positions on various issues, and keep the community notified of their schedule. &lt;br&gt;
&lt;br&gt;
Uses Include:&lt;br&gt;
&lt;i&gt;&lt;font color=&quot;#FF0000&quot;&gt;Blogs&lt;br&gt;
Speaking Engagements &lt;br&gt;
Statements&lt;br&gt;
&lt;/i&gt;&lt;/font&gt;</description>
<link>http://www.feedforall.com/politics.htm</link>
<category domain="www.dmoz.com">Computers/Software/Internet/Site Management/Content Management</category>
<comments>http://www.feedforall.com/forum</comments>
<pubDate>Tue, 19 Oct 2004 11:09:03 -0400</pubDate>
</item>
<item>
<title>RSS Solutions for Meteorologists</title>
<description>FeedForAll helps Meteorologists communicate with the general public about storm warnings and weather alerts, in specific regions. Using RSS meteorologists are able to quickly disseminate urgent and life threatening weather warnings. &lt;br&gt;
&lt;br&gt;
Uses Include:&lt;br&gt;
&lt;i&gt;&lt;font color=&quot;#0000FF&quot;&gt;Weather Alerts&lt;br&gt;
Plotting Storms&lt;br&gt;
School Cancellations &lt;/i&gt;&lt;/font&gt;</description>
<link>http://www.feedforall.com/weather.htm</link>
<category domain="www.dmoz.com">Computers/Software/Internet/Site Management/Content Management</category>
<comments>http://www.feedforall.com/forum</comments>
<pubDate>Tue, 19 Oct 2004 11:09:01 -0400</pubDate>
</item>
<item>
<title>RSS Solutions for Realtors &amp; Real Estate Firms</title>
<description>FeedForAll helps Realtors and Real Estate companies communicate with clients informing them of newly available properties, and open house announcements. RSS helps to reach a targeted audience and spread the word in an inexpensive, professional manner. &lt;font color=&quot;#0000FF&quot;&gt;&lt;br&gt;
&lt;/font&gt;&lt;br&gt;
Feeds can be used for:&lt;br&gt;
&lt;i&gt;&lt;font color=&quot;#FF0000&quot;&gt;Open House Dates&lt;br&gt;
New Properties For Sale&lt;br&gt;
Mortgage Rates&lt;/i&gt;&lt;/font&gt;</description>
<link>http://www.feedforall.com/real-estate.htm</link>
<category domain="www.dmoz.com">Computers/Software/Internet/Site Management/Content Management</category>
<comments>http://www.feedforall.com/forum</comments>
<pubDate>Tue, 19 Oct 2004 11:08:59 -0400</pubDate>
</item>
<item>
<title>RSS Solutions for Banks / Mortgage Companies</title>
<description>FeedForAll helps &lt;b&gt;Banks, Credit Unions and Mortgage companies&lt;/b&gt; communicate with the general public about rate changes in a prompt and professional manner. &lt;br&gt;
&lt;br&gt;
Uses include:&lt;br&gt;
&lt;i&gt;&lt;font color=&quot;#0000FF&quot;&gt;Mortgage Rates&lt;br&gt;
Foreign Exchange Rates &lt;br&gt;
Bank Rates&lt;br&gt;
Specials&lt;/i&gt;&lt;/font&gt;</description>
<link>http://www.feedforall.com/banks.htm</link>
<category domain="www.dmoz.com">Computers/Software/Internet/Site Management/Content Management</category>
<comments>http://www.feedforall.com/forum</comments>
<pubDate>Tue, 19 Oct 2004 11:08:57 -0400</pubDate>
</item>
<item>
<title>RSS Solutions for Law Enforcement</title>
<description>&lt;b&gt;FeedForAll&lt;/b&gt; helps Law Enforcement Professionals communicate with the general public and other agencies in a prompt and efficient manner. Using RSS police are able to quickly disseminate urgent and life threatening information. &lt;br&gt;
&lt;br&gt;
Uses include:&lt;br&gt;
&lt;i&gt;&lt;font color=&quot;#0000FF&quot;&gt;Amber Alerts&lt;br&gt;
Sex Offender Community Notification &lt;br&gt;
Weather Alerts &lt;br&gt;
Scheduling &lt;br&gt;
Security Alerts &lt;br&gt;
Police Report &lt;br&gt;
Meetings&lt;/i&gt;&lt;/font&gt;</description>
<link>http://www.feedforall.com/law-enforcement.htm</link>
<category domain="www.dmoz.com">Computers/Software/Internet/Site Management/Content Management</category>
<comments>http://www.feedforall.com/forum</comments>
<pubDate>Tue, 19 Oct 2004 11:08:56 -0400</pubDate>
</item>
</channel>
</rss>

View file

@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<foo>
<bar>
</bar>
</foo>

View file

@ -0,0 +1 @@
OTTO

Some files were not shown because too many files have changed in this diff Show more