mirror of
https://github.com/servo/servo.git
synced 2025-08-06 06:00:15 +01:00
Rustfmt net crate
This commit is contained in:
parent
ba1ed11ced
commit
2481ad25f8
30 changed files with 4957 additions and 2870 deletions
|
@ -25,16 +25,17 @@ pub enum MediaType {
|
|||
|
||||
pub enum ApacheBugFlag {
|
||||
On,
|
||||
Off
|
||||
Off,
|
||||
}
|
||||
|
||||
impl ApacheBugFlag {
|
||||
/// <https://mimesniff.spec.whatwg.org/#supplied-mime-type-detection-algorithm>
|
||||
pub fn from_content_type(last_raw_content_type: &[u8]) -> ApacheBugFlag {
|
||||
if last_raw_content_type == b"text/plain"
|
||||
|| last_raw_content_type == b"text/plain; charset=ISO-8859-1"
|
||||
|| last_raw_content_type == b"text/plain; charset=iso-8859-1"
|
||||
|| last_raw_content_type == b"text/plain; charset=UTF-8" {
|
||||
if last_raw_content_type == b"text/plain" ||
|
||||
last_raw_content_type == b"text/plain; charset=ISO-8859-1" ||
|
||||
last_raw_content_type == b"text/plain; charset=iso-8859-1" ||
|
||||
last_raw_content_type == b"text/plain; charset=UTF-8"
|
||||
{
|
||||
ApacheBugFlag::On
|
||||
} else {
|
||||
ApacheBugFlag::Off
|
||||
|
@ -45,19 +46,22 @@ impl ApacheBugFlag {
|
|||
#[derive(PartialEq)]
|
||||
pub enum NoSniffFlag {
|
||||
On,
|
||||
Off
|
||||
Off,
|
||||
}
|
||||
|
||||
|
||||
impl MimeClassifier {
|
||||
//Performs MIME Type Sniffing Algorithm (sections 7 and 8)
|
||||
pub fn classify<'a>(&'a self,
|
||||
context: LoadContext,
|
||||
no_sniff_flag: NoSniffFlag,
|
||||
apache_bug_flag: ApacheBugFlag,
|
||||
supplied_type: &Option<Mime>,
|
||||
data: &'a [u8]) -> Mime {
|
||||
let supplied_type_or_octet_stream = supplied_type.clone().unwrap_or(mime::APPLICATION_OCTET_STREAM);
|
||||
pub fn classify<'a>(
|
||||
&'a self,
|
||||
context: LoadContext,
|
||||
no_sniff_flag: NoSniffFlag,
|
||||
apache_bug_flag: ApacheBugFlag,
|
||||
supplied_type: &Option<Mime>,
|
||||
data: &'a [u8],
|
||||
) -> Mime {
|
||||
let supplied_type_or_octet_stream = supplied_type
|
||||
.clone()
|
||||
.unwrap_or(mime::APPLICATION_OCTET_STREAM);
|
||||
match context {
|
||||
LoadContext::Browsing => match *supplied_type {
|
||||
None => self.sniff_unknown_type(no_sniff_flag, data),
|
||||
|
@ -69,30 +73,41 @@ impl MimeClassifier {
|
|||
NoSniffFlag::On => supplied_type.clone(),
|
||||
NoSniffFlag::Off => match apache_bug_flag {
|
||||
ApacheBugFlag::On => self.sniff_text_or_data(data),
|
||||
ApacheBugFlag::Off => match MimeClassifier::get_media_type(supplied_type) {
|
||||
Some(MediaType::Html) => self.feeds_classifier.classify(data),
|
||||
Some(MediaType::Image) => self.image_classifier.classify(data),
|
||||
Some(MediaType::AudioVideo) => self.audio_video_classifier.classify(data),
|
||||
Some(MediaType::Xml) | None => None,
|
||||
}.unwrap_or(supplied_type.clone())
|
||||
}
|
||||
ApacheBugFlag::Off => {
|
||||
match MimeClassifier::get_media_type(supplied_type) {
|
||||
Some(MediaType::Html) => {
|
||||
self.feeds_classifier.classify(data)
|
||||
},
|
||||
Some(MediaType::Image) => {
|
||||
self.image_classifier.classify(data)
|
||||
},
|
||||
Some(MediaType::AudioVideo) => {
|
||||
self.audio_video_classifier.classify(data)
|
||||
},
|
||||
Some(MediaType::Xml) | None => None,
|
||||
}
|
||||
.unwrap_or(supplied_type.clone())
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
},
|
||||
LoadContext::Image => {
|
||||
// Section 8.2 Sniffing an image context
|
||||
match MimeClassifier::maybe_get_media_type(supplied_type) {
|
||||
Some(MediaType::Xml) => None,
|
||||
_ => self.image_classifier.classify(data),
|
||||
}.unwrap_or(supplied_type_or_octet_stream)
|
||||
}
|
||||
.unwrap_or(supplied_type_or_octet_stream)
|
||||
},
|
||||
LoadContext::AudioVideo => {
|
||||
// Section 8.3 Sniffing an image context
|
||||
match MimeClassifier::maybe_get_media_type(supplied_type) {
|
||||
Some(MediaType::Xml) => None,
|
||||
_ => self.audio_video_classifier.classify(data),
|
||||
}.unwrap_or(supplied_type_or_octet_stream)
|
||||
}
|
||||
.unwrap_or(supplied_type_or_octet_stream)
|
||||
},
|
||||
LoadContext::Plugin => {
|
||||
// 8.4 Sniffing in a plugin context
|
||||
|
@ -129,7 +144,8 @@ impl MimeClassifier {
|
|||
match MimeClassifier::maybe_get_media_type(supplied_type) {
|
||||
Some(MediaType::Xml) => None,
|
||||
_ => self.font_classifier.classify(data),
|
||||
}.unwrap_or(supplied_type_or_octet_stream)
|
||||
}
|
||||
.unwrap_or(supplied_type_or_octet_stream)
|
||||
},
|
||||
LoadContext::TextTrack => {
|
||||
// 8.8 Sniffing in a text track context
|
||||
|
@ -149,16 +165,16 @@ impl MimeClassifier {
|
|||
}
|
||||
|
||||
pub fn new() -> MimeClassifier {
|
||||
MimeClassifier {
|
||||
image_classifier: GroupedClassifier::image_classifer(),
|
||||
audio_video_classifier: GroupedClassifier::audio_video_classifier(),
|
||||
scriptable_classifier: GroupedClassifier::scriptable_classifier(),
|
||||
plaintext_classifier: GroupedClassifier::plaintext_classifier(),
|
||||
archive_classifier: GroupedClassifier::archive_classifier(),
|
||||
binary_or_plaintext: BinaryOrPlaintextClassifier,
|
||||
feeds_classifier: FeedsClassifier,
|
||||
font_classifier: GroupedClassifier::font_classifier()
|
||||
}
|
||||
MimeClassifier {
|
||||
image_classifier: GroupedClassifier::image_classifer(),
|
||||
audio_video_classifier: GroupedClassifier::audio_video_classifier(),
|
||||
scriptable_classifier: GroupedClassifier::scriptable_classifier(),
|
||||
plaintext_classifier: GroupedClassifier::plaintext_classifier(),
|
||||
archive_classifier: GroupedClassifier::archive_classifier(),
|
||||
binary_or_plaintext: BinaryOrPlaintextClassifier,
|
||||
feeds_classifier: FeedsClassifier,
|
||||
font_classifier: GroupedClassifier::font_classifier(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn validate(&self) -> Result<(), String> {
|
||||
|
@ -182,7 +198,8 @@ impl MimeClassifier {
|
|||
None
|
||||
};
|
||||
|
||||
sniffed.or_else(|| self.plaintext_classifier.classify(data))
|
||||
sniffed
|
||||
.or_else(|| self.plaintext_classifier.classify(data))
|
||||
.or_else(|| self.image_classifier.classify(data))
|
||||
.or_else(|| self.audio_video_classifier.classify(data))
|
||||
.or_else(|| self.archive_classifier.classify(data))
|
||||
|
@ -191,13 +208,15 @@ impl MimeClassifier {
|
|||
}
|
||||
|
||||
fn sniff_text_or_data<'a>(&'a self, data: &'a [u8]) -> Mime {
|
||||
self.binary_or_plaintext.classify(data).expect("BinaryOrPlaintextClassifier always succeeds")
|
||||
self.binary_or_plaintext
|
||||
.classify(data)
|
||||
.expect("BinaryOrPlaintextClassifier always succeeds")
|
||||
}
|
||||
|
||||
fn is_xml(mt: &Mime) -> bool {
|
||||
mt.suffix() == Some(mime::XML) ||
|
||||
(mt.type_() == mime::APPLICATION && mt.subtype() == mime::XML) ||
|
||||
(mt.type_() == mime::TEXT && mt.subtype() == mime::XML)
|
||||
(mt.type_() == mime::APPLICATION && mt.subtype() == mime::XML) ||
|
||||
(mt.type_() == mime::TEXT && mt.subtype() == mime::XML)
|
||||
}
|
||||
|
||||
fn is_html(mt: &Mime) -> bool {
|
||||
|
@ -210,21 +229,21 @@ impl MimeClassifier {
|
|||
|
||||
fn is_audio_video(mt: &Mime) -> bool {
|
||||
mt.type_() == mime::AUDIO ||
|
||||
mt.type_() == mime::VIDEO ||
|
||||
mt.type_() == mime::APPLICATION && mt.subtype() == mime::OGG
|
||||
mt.type_() == mime::VIDEO ||
|
||||
mt.type_() == mime::APPLICATION && mt.subtype() == mime::OGG
|
||||
}
|
||||
|
||||
fn is_explicit_unknown(mt: &Mime) -> bool {
|
||||
mt.type_().as_str() == "unknown" && mt.subtype().as_str() == "unknown" ||
|
||||
mt.type_() == mime::APPLICATION && mt.subtype().as_str() == "unknown" ||
|
||||
mt.type_() == mime::STAR && mt.subtype() == mime::STAR
|
||||
mt.type_() == mime::APPLICATION && mt.subtype().as_str() == "unknown" ||
|
||||
mt.type_() == mime::STAR && mt.subtype() == mime::STAR
|
||||
}
|
||||
|
||||
fn get_media_type(mime: &Mime) -> Option<MediaType> {
|
||||
if MimeClassifier::is_xml(&mime) {
|
||||
Some(MediaType::Xml)
|
||||
} else if MimeClassifier::is_html(&mime) {
|
||||
Some(MediaType::Html)
|
||||
Some(MediaType::Html)
|
||||
} else if MimeClassifier::is_image(&mime) {
|
||||
Some(MediaType::Image)
|
||||
} else if MimeClassifier::is_audio_video(&mime) {
|
||||
|
@ -235,9 +254,9 @@ impl MimeClassifier {
|
|||
}
|
||||
|
||||
fn maybe_get_media_type(supplied_type: &Option<Mime>) -> Option<MediaType> {
|
||||
supplied_type.as_ref().and_then(|ref mime| {
|
||||
MimeClassifier::get_media_type(mime)
|
||||
})
|
||||
supplied_type
|
||||
.as_ref()
|
||||
.and_then(|ref mime| MimeClassifier::get_media_type(mime))
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -252,7 +271,7 @@ trait Matches {
|
|||
fn matches(&mut self, matches: &[u8]) -> bool;
|
||||
}
|
||||
|
||||
impl <'a, T: Iterator<Item=&'a u8> + Clone> Matches for T {
|
||||
impl<'a, T: Iterator<Item = &'a u8> + Clone> Matches for T {
|
||||
// Matching function that works on an iterator.
|
||||
// see if the next matches.len() bytes in data_iterator equal matches
|
||||
// move iterator and return true or just return false
|
||||
|
@ -270,7 +289,7 @@ impl <'a, T: Iterator<Item=&'a u8> + Clone> Matches for T {
|
|||
fn matches(&mut self, matches: &[u8]) -> bool {
|
||||
if self.clone().nth(matches.len()).is_none() {
|
||||
// there are less than matches.len() elements in self
|
||||
return false
|
||||
return false;
|
||||
}
|
||||
let result = self.clone().zip(matches).all(|(s, m)| *s == *m);
|
||||
if result {
|
||||
|
@ -294,64 +313,68 @@ impl ByteMatcher {
|
|||
} else if data == self.pattern {
|
||||
Some(self.pattern.len())
|
||||
} else {
|
||||
data[..data.len() - self.pattern.len() + 1].iter()
|
||||
data[..data.len() - self.pattern.len() + 1]
|
||||
.iter()
|
||||
.position(|x| !self.leading_ignore.contains(x))
|
||||
.and_then(|start|
|
||||
if data[start..].iter()
|
||||
.zip(self.pattern.iter()).zip(self.mask.iter())
|
||||
.all(|((&data, &pattern), &mask)| (data & mask) == pattern) {
|
||||
.and_then(|start| {
|
||||
if data[start..]
|
||||
.iter()
|
||||
.zip(self.pattern.iter())
|
||||
.zip(self.mask.iter())
|
||||
.all(|((&data, &pattern), &mask)| (data & mask) == pattern)
|
||||
{
|
||||
Some(start + self.pattern.len())
|
||||
} else {
|
||||
None
|
||||
})
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl MIMEChecker for ByteMatcher {
|
||||
fn classify(&self, data: &[u8]) -> Option<Mime> {
|
||||
self.matches(data).map(|_| {
|
||||
self.content_type.clone()
|
||||
})
|
||||
self.matches(data).map(|_| self.content_type.clone())
|
||||
}
|
||||
|
||||
fn validate(&self) -> Result<(), String> {
|
||||
if self.pattern.len() == 0 {
|
||||
return Err(format!(
|
||||
"Zero length pattern for {:?}",
|
||||
self.content_type
|
||||
))
|
||||
return Err(format!("Zero length pattern for {:?}", self.content_type));
|
||||
}
|
||||
if self.pattern.len() != self.mask.len() {
|
||||
return Err(format!(
|
||||
"Unequal pattern and mask length for {:?}",
|
||||
self.content_type
|
||||
))
|
||||
));
|
||||
}
|
||||
if self.pattern.iter().zip(self.mask.iter()).any(
|
||||
|(&pattern, &mask)| pattern & mask != pattern
|
||||
) {
|
||||
if self
|
||||
.pattern
|
||||
.iter()
|
||||
.zip(self.mask.iter())
|
||||
.any(|(&pattern, &mask)| pattern & mask != pattern)
|
||||
{
|
||||
return Err(format!(
|
||||
"Pattern not pre-masked for {:?}",
|
||||
self.content_type
|
||||
))
|
||||
));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
struct TagTerminatedByteMatcher {
|
||||
matcher: ByteMatcher
|
||||
matcher: ByteMatcher,
|
||||
}
|
||||
|
||||
impl MIMEChecker for TagTerminatedByteMatcher {
|
||||
fn classify(&self, data: &[u8]) -> Option<Mime> {
|
||||
self.matcher.matches(data).and_then(|j|
|
||||
self.matcher.matches(data).and_then(|j| {
|
||||
if j < data.len() && (data[j] == b' ' || data[j] == b'>') {
|
||||
Some(self.matcher.content_type.clone())
|
||||
} else {
|
||||
None
|
||||
})
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
fn validate(&self) -> Result<(), String> {
|
||||
|
@ -367,8 +390,10 @@ impl Mp4Matcher {
|
|||
return false;
|
||||
}
|
||||
|
||||
let box_size = ((data[0] as u32) << 24 | (data[1] as u32) << 16 |
|
||||
(data[2] as u32) << 8 | (data[3] as u32)) as usize;
|
||||
let box_size = ((data[0] as u32) << 24 |
|
||||
(data[1] as u32) << 16 |
|
||||
(data[2] as u32) << 8 |
|
||||
(data[3] as u32)) as usize;
|
||||
if (data.len() < box_size) || (box_size % 4 != 0) {
|
||||
return false;
|
||||
}
|
||||
|
@ -380,9 +405,10 @@ impl Mp4Matcher {
|
|||
|
||||
let mp4 = [0x6D, 0x70, 0x34];
|
||||
data[8..].starts_with(&mp4) ||
|
||||
data[16..box_size].chunks(4).any(|chunk| chunk.starts_with(&mp4))
|
||||
data[16..box_size]
|
||||
.chunks(4)
|
||||
.any(|chunk| chunk.starts_with(&mp4))
|
||||
}
|
||||
|
||||
}
|
||||
impl MIMEChecker for Mp4Matcher {
|
||||
fn classify(&self, data: &[u8]) -> Option<Mime> {
|
||||
|
@ -403,14 +429,16 @@ struct BinaryOrPlaintextClassifier;
|
|||
impl BinaryOrPlaintextClassifier {
|
||||
fn classify_impl(&self, data: &[u8]) -> Mime {
|
||||
if data.starts_with(&[0xFFu8, 0xFEu8]) ||
|
||||
data.starts_with(&[0xFEu8, 0xFFu8]) ||
|
||||
data.starts_with(&[0xEFu8, 0xBBu8, 0xBFu8])
|
||||
data.starts_with(&[0xFEu8, 0xFFu8]) ||
|
||||
data.starts_with(&[0xEFu8, 0xBBu8, 0xBFu8])
|
||||
{
|
||||
mime::TEXT_PLAIN
|
||||
} else if data.iter().any(|&x| x <= 0x08u8 ||
|
||||
x == 0x0Bu8 ||
|
||||
(x >= 0x0Eu8 && x <= 0x1Au8) ||
|
||||
(x >= 0x1Cu8 && x <= 0x1Fu8)) {
|
||||
} else if data.iter().any(|&x| {
|
||||
x <= 0x08u8 ||
|
||||
x == 0x0Bu8 ||
|
||||
(x >= 0x0Eu8 && x <= 0x1Au8) ||
|
||||
(x >= 0x1Cu8 && x <= 0x1Fu8)
|
||||
}) {
|
||||
mime::APPLICATION_OCTET_STREAM
|
||||
} else {
|
||||
mime::TEXT_PLAIN
|
||||
|
@ -425,7 +453,6 @@ impl MIMEChecker for BinaryOrPlaintextClassifier {
|
|||
fn validate(&self) -> Result<(), String> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
}
|
||||
struct GroupedClassifier {
|
||||
byte_matchers: Vec<Box<MIMEChecker + Send + Sync>>,
|
||||
|
@ -442,7 +469,7 @@ impl GroupedClassifier {
|
|||
Box::new(ByteMatcher::image_webp()),
|
||||
Box::new(ByteMatcher::image_png()),
|
||||
Box::new(ByteMatcher::image_jpeg()),
|
||||
]
|
||||
],
|
||||
}
|
||||
}
|
||||
fn audio_video_classifier() -> GroupedClassifier {
|
||||
|
@ -456,8 +483,8 @@ impl GroupedClassifier {
|
|||
Box::new(ByteMatcher::audio_midi()),
|
||||
Box::new(ByteMatcher::video_avi()),
|
||||
Box::new(ByteMatcher::audio_wave()),
|
||||
Box::new(Mp4Matcher)
|
||||
]
|
||||
Box::new(Mp4Matcher),
|
||||
],
|
||||
}
|
||||
}
|
||||
fn scriptable_classifier() -> GroupedClassifier {
|
||||
|
@ -481,8 +508,8 @@ impl GroupedClassifier {
|
|||
Box::new(ByteMatcher::text_html_p()),
|
||||
Box::new(ByteMatcher::text_html_comment()),
|
||||
Box::new(ByteMatcher::text_xml()),
|
||||
Box::new(ByteMatcher::application_pdf())
|
||||
]
|
||||
Box::new(ByteMatcher::application_pdf()),
|
||||
],
|
||||
}
|
||||
}
|
||||
fn plaintext_classifier() -> GroupedClassifier {
|
||||
|
@ -491,8 +518,8 @@ impl GroupedClassifier {
|
|||
Box::new(ByteMatcher::text_plain_utf_8_bom()),
|
||||
Box::new(ByteMatcher::text_plain_utf_16le_bom()),
|
||||
Box::new(ByteMatcher::text_plain_utf_16be_bom()),
|
||||
Box::new(ByteMatcher::application_postscript())
|
||||
]
|
||||
Box::new(ByteMatcher::application_postscript()),
|
||||
],
|
||||
}
|
||||
}
|
||||
fn archive_classifier() -> GroupedClassifier {
|
||||
|
@ -500,8 +527,8 @@ impl GroupedClassifier {
|
|||
byte_matchers: vec![
|
||||
Box::new(ByteMatcher::application_x_gzip()),
|
||||
Box::new(ByteMatcher::application_zip()),
|
||||
Box::new(ByteMatcher::application_x_rar_compressed())
|
||||
]
|
||||
Box::new(ByteMatcher::application_x_rar_compressed()),
|
||||
],
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -513,7 +540,7 @@ impl GroupedClassifier {
|
|||
Box::new(ByteMatcher::open_type()),
|
||||
Box::new(ByteMatcher::true_type()),
|
||||
Box::new(ByteMatcher::application_vnd_ms_font_object()),
|
||||
]
|
||||
],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -536,7 +563,7 @@ impl MIMEChecker for GroupedClassifier {
|
|||
enum Match {
|
||||
Start,
|
||||
DidNotMatch,
|
||||
StartAndEnd
|
||||
StartAndEnd,
|
||||
}
|
||||
|
||||
impl Match {
|
||||
|
@ -549,7 +576,9 @@ impl Match {
|
|||
}
|
||||
|
||||
fn eats_until<'a, T>(matcher: &mut T, start: &[u8], end: &[u8]) -> Match
|
||||
where T: Iterator<Item=&'a u8> + Clone {
|
||||
where
|
||||
T: Iterator<Item = &'a u8> + Clone,
|
||||
{
|
||||
if !matcher.matches(start) {
|
||||
Match::DidNotMatch
|
||||
} else if end.len() == 1 {
|
||||
|
@ -593,11 +622,12 @@ impl FeedsClassifier {
|
|||
|
||||
// Steps 5.2.1 to 5.2.4
|
||||
match eats_until(&mut matcher, b"?", b"?>")
|
||||
.chain(|| eats_until(&mut matcher, b"!--", b"-->"))
|
||||
.chain(|| eats_until(&mut matcher, b"!", b">")) {
|
||||
.chain(|| eats_until(&mut matcher, b"!--", b"-->"))
|
||||
.chain(|| eats_until(&mut matcher, b"!", b">"))
|
||||
{
|
||||
Match::StartAndEnd => continue,
|
||||
Match::DidNotMatch => {},
|
||||
Match::Start => return None
|
||||
Match::Start => return None,
|
||||
}
|
||||
|
||||
// Step 5.2.5
|
||||
|
@ -611,15 +641,21 @@ impl FeedsClassifier {
|
|||
// Step 5.2.7
|
||||
if matcher.matches(b"rdf:RDF") {
|
||||
while matcher.next().is_some() {
|
||||
match eats_until(&mut matcher,
|
||||
b"http://purl.org/rss/1.0/",
|
||||
b"http://www.w3.org/1999/02/22-rdf-syntax-ns#")
|
||||
.chain(|| eats_until(&mut matcher,
|
||||
b"http://www.w3.org/1999/02/22-rdf-syntax-ns#",
|
||||
b"http://purl.org/rss/1.0/")) {
|
||||
match eats_until(
|
||||
&mut matcher,
|
||||
b"http://purl.org/rss/1.0/",
|
||||
b"http://www.w3.org/1999/02/22-rdf-syntax-ns#",
|
||||
)
|
||||
.chain(|| {
|
||||
eats_until(
|
||||
&mut matcher,
|
||||
b"http://www.w3.org/1999/02/22-rdf-syntax-ns#",
|
||||
b"http://purl.org/rss/1.0/",
|
||||
)
|
||||
}) {
|
||||
Match::StartAndEnd => return Some("application/rss+xml".parse().unwrap()),
|
||||
Match::DidNotMatch => {},
|
||||
Match::Start => return None
|
||||
Match::Start => return None,
|
||||
}
|
||||
}
|
||||
return None;
|
||||
|
@ -630,7 +666,7 @@ impl FeedsClassifier {
|
|||
|
||||
impl MIMEChecker for FeedsClassifier {
|
||||
fn classify(&self, data: &[u8]) -> Option<Mime> {
|
||||
self.classify_impl(data)
|
||||
self.classify_impl(data)
|
||||
}
|
||||
|
||||
fn validate(&self) -> Result<(), String> {
|
||||
|
@ -647,7 +683,7 @@ impl ByteMatcher {
|
|||
pattern: b"\x00\x00\x01\x00",
|
||||
mask: b"\xFF\xFF\xFF\xFF",
|
||||
content_type: "image/x-icon".parse().unwrap(),
|
||||
leading_ignore: &[]
|
||||
leading_ignore: &[],
|
||||
}
|
||||
}
|
||||
//A Windows Cursor signature.
|
||||
|
@ -656,7 +692,7 @@ impl ByteMatcher {
|
|||
pattern: b"\x00\x00\x02\x00",
|
||||
mask: b"\xFF\xFF\xFF\xFF",
|
||||
content_type: "image/x-icon".parse().unwrap(),
|
||||
leading_ignore: &[]
|
||||
leading_ignore: &[],
|
||||
}
|
||||
}
|
||||
//The string "BM", a BMP signature.
|
||||
|
@ -665,7 +701,7 @@ impl ByteMatcher {
|
|||
pattern: b"BM",
|
||||
mask: b"\xFF\xFF",
|
||||
content_type: mime::IMAGE_BMP,
|
||||
leading_ignore: &[]
|
||||
leading_ignore: &[],
|
||||
}
|
||||
}
|
||||
//The string "GIF89a", a GIF signature.
|
||||
|
@ -674,7 +710,7 @@ impl ByteMatcher {
|
|||
pattern: b"GIF89a",
|
||||
mask: b"\xFF\xFF\xFF\xFF\xFF\xFF",
|
||||
content_type: mime::IMAGE_GIF,
|
||||
leading_ignore: &[]
|
||||
leading_ignore: &[],
|
||||
}
|
||||
}
|
||||
//The string "GIF87a", a GIF signature.
|
||||
|
@ -683,7 +719,7 @@ impl ByteMatcher {
|
|||
pattern: b"GIF87a",
|
||||
mask: b"\xFF\xFF\xFF\xFF\xFF\xFF",
|
||||
content_type: mime::IMAGE_GIF,
|
||||
leading_ignore: &[]
|
||||
leading_ignore: &[],
|
||||
}
|
||||
}
|
||||
//The string "RIFF" followed by four bytes followed by the string "WEBPVP".
|
||||
|
@ -692,7 +728,7 @@ impl ByteMatcher {
|
|||
pattern: b"RIFF\x00\x00\x00\x00WEBPVP",
|
||||
mask: b"\xFF\xFF\xFF\xFF\x00\x00\x00\x00\xFF\xFF\xFF\xFF\xFF\xFF",
|
||||
content_type: "image/webp".parse().unwrap(),
|
||||
leading_ignore: &[]
|
||||
leading_ignore: &[],
|
||||
}
|
||||
}
|
||||
//An error-checking byte followed by the string "PNG" followed by CR LF SUB LF, the PNG
|
||||
|
@ -702,7 +738,7 @@ impl ByteMatcher {
|
|||
pattern: b"\x89PNG\r\n\x1A\n",
|
||||
mask: b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF",
|
||||
content_type: mime::IMAGE_PNG,
|
||||
leading_ignore: &[]
|
||||
leading_ignore: &[],
|
||||
}
|
||||
}
|
||||
// The JPEG Start of Image marker followed by the indicator byte of another marker.
|
||||
|
@ -711,7 +747,7 @@ impl ByteMatcher {
|
|||
pattern: b"\xFF\xD8\xFF",
|
||||
mask: b"\xFF\xFF\xFF",
|
||||
content_type: mime::IMAGE_JPEG,
|
||||
leading_ignore: &[]
|
||||
leading_ignore: &[],
|
||||
}
|
||||
}
|
||||
//The WebM signature. [TODO: Use more bytes?]
|
||||
|
@ -720,7 +756,7 @@ impl ByteMatcher {
|
|||
pattern: b"\x1A\x45\xDF\xA3",
|
||||
mask: b"\xFF\xFF\xFF\xFF",
|
||||
content_type: "video/webm".parse().unwrap(),
|
||||
leading_ignore: &[]
|
||||
leading_ignore: &[],
|
||||
}
|
||||
}
|
||||
//The string ".snd", the basic audio signature.
|
||||
|
@ -729,16 +765,16 @@ impl ByteMatcher {
|
|||
pattern: b".snd",
|
||||
mask: b"\xFF\xFF\xFF\xFF",
|
||||
content_type: "audio/basic".parse().unwrap(),
|
||||
leading_ignore: &[]
|
||||
leading_ignore: &[],
|
||||
}
|
||||
}
|
||||
//The string "FORM" followed by four bytes followed by the string "AIFF", the AIFF signature.
|
||||
fn audio_aiff() -> ByteMatcher {
|
||||
ByteMatcher {
|
||||
pattern: b"FORM\x00\x00\x00\x00AIFF",
|
||||
mask: b"\xFF\xFF\xFF\xFF\x00\x00\x00\x00\xFF\xFF\xFF\xFF",
|
||||
pattern: b"FORM\x00\x00\x00\x00AIFF",
|
||||
mask: b"\xFF\xFF\xFF\xFF\x00\x00\x00\x00\xFF\xFF\xFF\xFF",
|
||||
content_type: "audio/aiff".parse().unwrap(),
|
||||
leading_ignore: &[]
|
||||
leading_ignore: &[],
|
||||
}
|
||||
}
|
||||
//The string "ID3", the ID3v2-tagged MP3 signature.
|
||||
|
@ -747,7 +783,7 @@ impl ByteMatcher {
|
|||
pattern: b"ID3",
|
||||
mask: b"\xFF\xFF\xFF",
|
||||
content_type: "audio/mpeg".parse().unwrap(),
|
||||
leading_ignore: &[]
|
||||
leading_ignore: &[],
|
||||
}
|
||||
}
|
||||
//The string "OggS" followed by NUL, the Ogg container signature.
|
||||
|
@ -756,7 +792,7 @@ impl ByteMatcher {
|
|||
pattern: b"OggS\x00",
|
||||
mask: b"\xFF\xFF\xFF\xFF\xFF",
|
||||
content_type: "application/ogg".parse().unwrap(),
|
||||
leading_ignore: &[]
|
||||
leading_ignore: &[],
|
||||
}
|
||||
}
|
||||
//The string "MThd" followed by four bytes representing the number 6 in 32 bits (big-endian),
|
||||
|
@ -766,7 +802,7 @@ impl ByteMatcher {
|
|||
pattern: b"MThd\x00\x00\x00\x06",
|
||||
mask: b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF",
|
||||
content_type: "audio/midi".parse().unwrap(),
|
||||
leading_ignore: &[]
|
||||
leading_ignore: &[],
|
||||
}
|
||||
}
|
||||
//The string "RIFF" followed by four bytes followed by the string "AVI ", the AVI signature.
|
||||
|
@ -775,7 +811,7 @@ impl ByteMatcher {
|
|||
pattern: b"RIFF\x00\x00\x00\x00AVI ",
|
||||
mask: b"\xFF\xFF\xFF\xFF\x00\x00\x00\x00\xFF\xFF\xFF\xFF",
|
||||
content_type: "video/avi".parse().unwrap(),
|
||||
leading_ignore: &[]
|
||||
leading_ignore: &[],
|
||||
}
|
||||
}
|
||||
// The string "RIFF" followed by four bytes followed by the string "WAVE", the WAVE signature.
|
||||
|
@ -784,7 +820,7 @@ impl ByteMatcher {
|
|||
pattern: b"RIFF\x00\x00\x00\x00WAVE",
|
||||
mask: b"\xFF\xFF\xFF\xFF\x00\x00\x00\x00\xFF\xFF\xFF\xFF",
|
||||
content_type: "audio/wave".parse().unwrap(),
|
||||
leading_ignore: &[]
|
||||
leading_ignore: &[],
|
||||
}
|
||||
}
|
||||
// doctype terminated with Tag terminating (TT) Byte
|
||||
|
@ -794,8 +830,8 @@ impl ByteMatcher {
|
|||
pattern: b"<!DOCTYPE HTML",
|
||||
mask: b"\xFF\xFF\xDF\xDF\xDF\xDF\xDF\xDF\xDF\xFF\xDF\xDF\xDF\xDF",
|
||||
content_type: mime::TEXT_HTML,
|
||||
leading_ignore: b"\t\n\x0C\r "
|
||||
}
|
||||
leading_ignore: b"\t\n\x0C\r ",
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -806,20 +842,20 @@ impl ByteMatcher {
|
|||
pattern: b"<HTML",
|
||||
mask: b"\xFF\xDF\xDF\xDF\xDF",
|
||||
content_type: mime::TEXT_HTML,
|
||||
leading_ignore: b"\t\n\x0C\r "
|
||||
}
|
||||
leading_ignore: b"\t\n\x0C\r ",
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// head terminated with Tag Terminating (TT) Byte
|
||||
fn text_html_head() -> TagTerminatedByteMatcher {
|
||||
TagTerminatedByteMatcher {
|
||||
TagTerminatedByteMatcher {
|
||||
matcher: ByteMatcher {
|
||||
pattern: b"<HEAD",
|
||||
mask: b"\xFF\xDF\xDF\xDF\xDF",
|
||||
content_type: mime::TEXT_HTML,
|
||||
leading_ignore: b"\t\n\x0C\r "
|
||||
}
|
||||
leading_ignore: b"\t\n\x0C\r ",
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -830,8 +866,8 @@ impl ByteMatcher {
|
|||
pattern: b"<SCRIPT",
|
||||
mask: b"\xFF\xDF\xDF\xDF\xDF\xDF\xDF",
|
||||
content_type: mime::TEXT_HTML,
|
||||
leading_ignore: b"\t\n\x0C\r "
|
||||
}
|
||||
leading_ignore: b"\t\n\x0C\r ",
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -842,8 +878,8 @@ impl ByteMatcher {
|
|||
pattern: b"<IFRAME",
|
||||
mask: b"\xFF\xDF\xDF\xDF\xDF\xDF\xDF",
|
||||
content_type: mime::TEXT_HTML,
|
||||
leading_ignore: b"\t\n\x0C\r "
|
||||
}
|
||||
leading_ignore: b"\t\n\x0C\r ",
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -854,8 +890,8 @@ impl ByteMatcher {
|
|||
pattern: b"<H1",
|
||||
mask: b"\xFF\xDF\xFF",
|
||||
content_type: mime::TEXT_HTML,
|
||||
leading_ignore: b"\t\n\x0C\r "
|
||||
}
|
||||
leading_ignore: b"\t\n\x0C\r ",
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -866,8 +902,8 @@ impl ByteMatcher {
|
|||
pattern: b"<DIV",
|
||||
mask: b"\xFF\xDF\xDF\xDF",
|
||||
content_type: mime::TEXT_HTML,
|
||||
leading_ignore: b"\t\n\x0C\r "
|
||||
}
|
||||
leading_ignore: b"\t\n\x0C\r ",
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -878,8 +914,8 @@ impl ByteMatcher {
|
|||
pattern: b"<FONT",
|
||||
mask: b"\xFF\xDF\xDF\xDF\xDF",
|
||||
content_type: mime::TEXT_HTML,
|
||||
leading_ignore: b"\t\n\x0C\r "
|
||||
}
|
||||
leading_ignore: b"\t\n\x0C\r ",
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -887,11 +923,11 @@ impl ByteMatcher {
|
|||
fn text_html_table() -> TagTerminatedByteMatcher {
|
||||
TagTerminatedByteMatcher {
|
||||
matcher: ByteMatcher {
|
||||
pattern: b"<TABLE",
|
||||
mask: b"\xFF\xDF\xDF\xDF\xDF\xDF",
|
||||
content_type: mime::TEXT_HTML,
|
||||
leading_ignore: b"\t\n\x0C\r "
|
||||
}
|
||||
pattern: b"<TABLE",
|
||||
mask: b"\xFF\xDF\xDF\xDF\xDF\xDF",
|
||||
content_type: mime::TEXT_HTML,
|
||||
leading_ignore: b"\t\n\x0C\r ",
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -902,8 +938,8 @@ impl ByteMatcher {
|
|||
pattern: b"<A",
|
||||
mask: b"\xFF\xDF",
|
||||
content_type: mime::TEXT_HTML,
|
||||
leading_ignore: b"\t\n\x0C\r "
|
||||
}
|
||||
leading_ignore: b"\t\n\x0C\r ",
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -914,8 +950,8 @@ impl ByteMatcher {
|
|||
pattern: b"<STYLE",
|
||||
mask: b"\xFF\xDF\xDF\xDF\xDF\xDF",
|
||||
content_type: mime::TEXT_HTML,
|
||||
leading_ignore: b"\t\n\x0C\r "
|
||||
}
|
||||
leading_ignore: b"\t\n\x0C\r ",
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -926,8 +962,8 @@ impl ByteMatcher {
|
|||
pattern: b"<TITLE",
|
||||
mask: b"\xFF\xDF\xDF\xDF\xDF\xDF",
|
||||
content_type: mime::TEXT_HTML,
|
||||
leading_ignore: b"\t\n\x0C\r "
|
||||
}
|
||||
leading_ignore: b"\t\n\x0C\r ",
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -938,8 +974,8 @@ impl ByteMatcher {
|
|||
pattern: b"<B",
|
||||
mask: b"\xFF\xDF",
|
||||
content_type: mime::TEXT_HTML,
|
||||
leading_ignore: b"\t\n\x0C\r "
|
||||
}
|
||||
leading_ignore: b"\t\n\x0C\r ",
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -950,8 +986,8 @@ impl ByteMatcher {
|
|||
pattern: b"<BODY",
|
||||
mask: b"\xFF\xDF\xDF\xDF\xDF",
|
||||
content_type: mime::TEXT_HTML,
|
||||
leading_ignore: b"\t\n\x0C\r "
|
||||
}
|
||||
leading_ignore: b"\t\n\x0C\r ",
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -962,8 +998,8 @@ impl ByteMatcher {
|
|||
pattern: b"<BR",
|
||||
mask: b"\xFF\xDF\xDF",
|
||||
content_type: mime::TEXT_HTML,
|
||||
leading_ignore: b"\t\n\x0C\r "
|
||||
}
|
||||
leading_ignore: b"\t\n\x0C\r ",
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -974,8 +1010,8 @@ impl ByteMatcher {
|
|||
pattern: b"<P",
|
||||
mask: b"\xFF\xDF",
|
||||
content_type: mime::TEXT_HTML,
|
||||
leading_ignore: b"\t\n\x0C\r "
|
||||
}
|
||||
leading_ignore: b"\t\n\x0C\r ",
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -986,8 +1022,8 @@ impl ByteMatcher {
|
|||
pattern: b"<!--",
|
||||
mask: b"\xFF\xFF\xFF\xFF",
|
||||
content_type: mime::TEXT_HTML,
|
||||
leading_ignore: b"\t\n\x0C\r "
|
||||
}
|
||||
leading_ignore: b"\t\n\x0C\r ",
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -997,7 +1033,7 @@ impl ByteMatcher {
|
|||
pattern: b"<?xml",
|
||||
mask: b"\xFF\xFF\xFF\xFF\xFF",
|
||||
content_type: mime::TEXT_XML,
|
||||
leading_ignore: b"\t\n\x0C\r "
|
||||
leading_ignore: b"\t\n\x0C\r ",
|
||||
}
|
||||
}
|
||||
//The string "%PDF-", the PDF signature.
|
||||
|
@ -1006,7 +1042,7 @@ impl ByteMatcher {
|
|||
pattern: b"%PDF-",
|
||||
mask: b"\xFF\xFF\xFF\xFF\xFF",
|
||||
content_type: mime::APPLICATION_PDF,
|
||||
leading_ignore: &[]
|
||||
leading_ignore: &[],
|
||||
}
|
||||
}
|
||||
//34 bytes followed by the string "LP", the Embedded OpenType signature.
|
||||
|
@ -1019,7 +1055,7 @@ impl ByteMatcher {
|
|||
\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\
|
||||
\x00\x00\xFF\xFF",
|
||||
content_type: "application/vnd.ms-fontobject".parse().unwrap(),
|
||||
leading_ignore: &[]
|
||||
leading_ignore: &[],
|
||||
}
|
||||
}
|
||||
//4 bytes representing the version number 1.0, a TrueType signature.
|
||||
|
@ -1028,7 +1064,7 @@ impl ByteMatcher {
|
|||
pattern: b"\x00\x01\x00\x00",
|
||||
mask: b"\xFF\xFF\xFF\xFF",
|
||||
content_type: "application/font-sfnt".parse().unwrap(),
|
||||
leading_ignore: &[]
|
||||
leading_ignore: &[],
|
||||
}
|
||||
}
|
||||
//The string "OTTO", the OpenType signature.
|
||||
|
@ -1037,7 +1073,7 @@ impl ByteMatcher {
|
|||
pattern: b"OTTO",
|
||||
mask: b"\xFF\xFF\xFF\xFF",
|
||||
content_type: "application/font-sfnt".parse().unwrap(),
|
||||
leading_ignore: &[]
|
||||
leading_ignore: &[],
|
||||
}
|
||||
}
|
||||
// The string "ttcf", the TrueType Collection signature.
|
||||
|
@ -1046,7 +1082,7 @@ impl ByteMatcher {
|
|||
pattern: b"ttcf",
|
||||
mask: b"\xFF\xFF\xFF\xFF",
|
||||
content_type: "application/font-sfnt".parse().unwrap(),
|
||||
leading_ignore: &[]
|
||||
leading_ignore: &[],
|
||||
}
|
||||
}
|
||||
// The string "wOFF", the Web Open Font Format signature.
|
||||
|
@ -1055,7 +1091,7 @@ impl ByteMatcher {
|
|||
pattern: b"wOFF",
|
||||
mask: b"\xFF\xFF\xFF\xFF",
|
||||
content_type: "application/font-woff".parse().unwrap(),
|
||||
leading_ignore: &[]
|
||||
leading_ignore: &[],
|
||||
}
|
||||
}
|
||||
//The GZIP archive signature.
|
||||
|
@ -1064,7 +1100,7 @@ impl ByteMatcher {
|
|||
pattern: b"\x1F\x8B\x08",
|
||||
mask: b"\xFF\xFF\xFF",
|
||||
content_type: "application/x-gzip".parse().unwrap(),
|
||||
leading_ignore: &[]
|
||||
leading_ignore: &[],
|
||||
}
|
||||
}
|
||||
//The string "PK" followed by ETX EOT, the ZIP archive signature.
|
||||
|
@ -1073,7 +1109,7 @@ impl ByteMatcher {
|
|||
pattern: b"PK\x03\x04",
|
||||
mask: b"\xFF\xFF\xFF\xFF",
|
||||
content_type: "application/zip".parse().unwrap(),
|
||||
leading_ignore: &[]
|
||||
leading_ignore: &[],
|
||||
}
|
||||
}
|
||||
//The string "Rar " followed by SUB BEL NUL, the RAR archive signature.
|
||||
|
@ -1082,16 +1118,16 @@ impl ByteMatcher {
|
|||
pattern: b"Rar \x1A\x07\x00",
|
||||
mask: b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF",
|
||||
content_type: "application/x-rar-compressed".parse().unwrap(),
|
||||
leading_ignore: &[]
|
||||
leading_ignore: &[],
|
||||
}
|
||||
}
|
||||
// The string "%!PS-Adobe-", the PostScript signature.
|
||||
fn application_postscript() -> ByteMatcher {
|
||||
ByteMatcher {
|
||||
pattern: b"%!PS-Adobe-",
|
||||
mask: b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF",
|
||||
mask: b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF",
|
||||
content_type: "application/postscript".parse().unwrap(),
|
||||
leading_ignore: &[]
|
||||
leading_ignore: &[],
|
||||
}
|
||||
}
|
||||
// UTF-16BE BOM
|
||||
|
@ -1100,7 +1136,7 @@ impl ByteMatcher {
|
|||
pattern: b"\xFE\xFF\x00\x00",
|
||||
mask: b"\xFF\xFF\x00\x00",
|
||||
content_type: mime::TEXT_PLAIN,
|
||||
leading_ignore: &[]
|
||||
leading_ignore: &[],
|
||||
}
|
||||
}
|
||||
//UTF-16LE BOM
|
||||
|
@ -1109,7 +1145,7 @@ impl ByteMatcher {
|
|||
pattern: b"\xFF\xFE\x00\x00",
|
||||
mask: b"\xFF\xFF\x00\x00",
|
||||
content_type: mime::TEXT_PLAIN,
|
||||
leading_ignore: &[]
|
||||
leading_ignore: &[],
|
||||
}
|
||||
}
|
||||
//UTF-8 BOM
|
||||
|
@ -1118,7 +1154,7 @@ impl ByteMatcher {
|
|||
pattern: b"\xEF\xBB\xBF\x00",
|
||||
mask: b"\xFF\xFF\xFF\x00",
|
||||
content_type: mime::TEXT_PLAIN,
|
||||
leading_ignore: &[]
|
||||
leading_ignore: &[],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue