MIMEClassifier::classify always succeeds; fix the type to reflect this.

This commit is contained in:
Eli Friedman 2015-10-13 18:03:40 -07:00
parent 9fca41c7a7
commit 1c4962288c
3 changed files with 26 additions and 29 deletions

View file

@ -38,10 +38,11 @@ impl MIMEClassifier {
no_sniff_flag: NoSniffFlag,
apache_bug_flag: ApacheBugFlag,
supplied_type: &Option<(String, String)>,
data: &[u8]) -> Option<(String, String)> {
data: &[u8]) -> (String, String) {
match *supplied_type {
None => self.sniff_unknown_type(no_sniff_flag, data),
Some((ref media_type, ref media_subtype)) => {
Some(ref supplied_type) => {
let &(ref media_type, ref media_subtype) = supplied_type;
if MIMEClassifier::is_explicit_unknown(media_type, media_subtype) {
self.sniff_unknown_type(no_sniff_flag, data)
} else {
@ -51,14 +52,12 @@ impl MIMEClassifier {
ApacheBugFlag::ON => self.sniff_text_or_data(data),
ApacheBugFlag::OFF => match MIMEClassifier::get_media_type(media_type,
media_subtype) {
Some(MediaType::Xml) => supplied_type.clone(),
Some(MediaType::Html) =>
//Implied in section 7.3, but flow is not clear
self.feeds_classifier.classify(data).or(supplied_type.clone()),
Some(MediaType::Xml) => None,
Some(MediaType::Html) => self.feeds_classifier.classify(data),
Some(MediaType::Image) => self.image_classifier.classify(data),
Some(MediaType::AudioVideo) => self.audio_video_classifier.classify(data),
None => None
}.or(supplied_type.clone())
}.unwrap_or(supplied_type.clone())
}
}
}
@ -79,8 +78,7 @@ impl MIMEClassifier {
}
//some sort of iterator over the classifiers might be better?
fn sniff_unknown_type(&self, no_sniff_flag: NoSniffFlag, data: &[u8]) ->
Option<(String, String)> {
fn sniff_unknown_type(&self, no_sniff_flag: NoSniffFlag, data: &[u8]) -> (String, String) {
let should_sniff_scriptable = no_sniff_flag == NoSniffFlag::OFF;
let sniffed = if should_sniff_scriptable {
self.scriptable_classifier.classify(data)
@ -93,10 +91,11 @@ impl MIMEClassifier {
.or_else(|| self.audio_video_classifier.classify(data))
.or_else(|| self.archive_classifier.classify(data))
.or_else(|| self.binary_or_plaintext.classify(data))
.expect("BinaryOrPlaintextClassifier always succeeds")
}
fn sniff_text_or_data(&self, data: &[u8]) -> Option<(String, String)> {
self.binary_or_plaintext.classify(data)
fn sniff_text_or_data(&self, data: &[u8]) -> (String, String) {
self.binary_or_plaintext.classify(data).expect("BinaryOrPlaintextClassifier always succeeds")
}
fn is_xml(tp: &str, sub_tp: &str) -> bool {

View file

@ -89,13 +89,13 @@ pub fn start_sending_sniffed_opt(start_chan: LoadConsumer, mut metadata: Metadat
metadata.content_type.map(|ContentType(Mime(toplevel, sublevel, _))| {
(format!("{}", toplevel), format!("{}", sublevel))
});
metadata.content_type = classifier.classify(no_sniff, check_for_apache_bug, &supplied_type,
&partial_body).map(|(toplevel, sublevel)| {
let mime_tp: TopLevel = toplevel.parse().unwrap();
let mime_sb: SubLevel = sublevel.parse().unwrap();
ContentType(Mime(mime_tp, mime_sb, vec!()))
});
let (toplevel, sublevel) = classifier.classify(no_sniff,
check_for_apache_bug,
&supplied_type,
&partial_body);
let mime_tp: TopLevel = toplevel.parse().unwrap();
let mime_sb: SubLevel = sublevel.parse().unwrap();
metadata.content_type = Some(ContentType(Mime(mime_tp, mime_sb, vec![])));
}
start_sending_opt(start_chan, metadata)

View file

@ -55,17 +55,15 @@ fn test_sniff_with_flags(filename_orig: &path::Path,
match read_result {
Ok(data) => {
match classifier.classify(no_sniff_flag, apache_bug_flag, &as_string_option(supplied_type), &data) {
Some((parsed_type, parsed_subtp)) => {
if (&parsed_type[..] != type_string) ||
(&parsed_subtp[..] != subtype_string) {
panic!("File {:?} parsed incorrectly should be {}/{}, parsed as {}/{}",
filename, type_string, subtype_string,
parsed_type, parsed_subtp);
}
}
None => panic!("No classification found for {:?} with supplied type {:?}",
filename, supplied_type),
let (parsed_type, parsed_subtp) = classifier.classify(no_sniff_flag,
apache_bug_flag,
&as_string_option(supplied_type),
&data);
if (&parsed_type[..] != type_string) ||
(&parsed_subtp[..] != subtype_string) {
panic!("File {:?} parsed incorrectly should be {}/{}, parsed as {}/{}",
filename, type_string, subtype_string,
parsed_type, parsed_subtp);
}
}
Err(e) => panic!("Couldn't read from file {:?} with error {}",