From 5301e59965143f8d3d14a292f87aa96a2634cac4 Mon Sep 17 00:00:00 2001 From: Simon Martin Date: Sun, 30 Aug 2015 00:15:23 +0200 Subject: [PATCH] Issue #7393: Properly sniff mislabeled feeds. --- components/net/mime_classifier.rs | 21 +++++++++++++------ tests/unit/net/mime_classifier.rs | 7 +++++++ .../net/parsable_mime/text/xml/rdf_rss.xml | 7 +++++++ .../parsable_mime/text/xml/rdf_rss_ko_1.xml | 7 +++++++ .../parsable_mime/text/xml/rdf_rss_ko_2.xml | 3 +++ .../parsable_mime/text/xml/rdf_rss_ko_3.xml | 6 ++++++ .../parsable_mime/text/xml/rdf_rss_ko_4.xml | 7 +++++++ 7 files changed, 52 insertions(+), 6 deletions(-) create mode 100644 tests/unit/net/parsable_mime/text/xml/rdf_rss.xml create mode 100644 tests/unit/net/parsable_mime/text/xml/rdf_rss_ko_1.xml create mode 100644 tests/unit/net/parsable_mime/text/xml/rdf_rss_ko_2.xml create mode 100644 tests/unit/net/parsable_mime/text/xml/rdf_rss_ko_3.xml create mode 100644 tests/unit/net/parsable_mime/text/xml/rdf_rss_ko_4.xml diff --git a/components/net/mime_classifier.rs b/components/net/mime_classifier.rs index 0ed472b3982..de1b445abf4 100644 --- a/components/net/mime_classifier.rs +++ b/components/net/mime_classifier.rs @@ -125,6 +125,10 @@ impl <'a, T: Iterator + Clone> Matches for T { // Side effects // moves the iterator when match is found fn matches(&mut self, matches: &[u8]) -> bool { + if self.clone().nth(matches.len()).is_none() { + // there are less than matches.len() elements in self + return false + } let result = self.clone().zip(matches).all(|(s, m)| *s == *m); if result { self.nth(matches.len()); @@ -381,9 +385,10 @@ where T: Iterator + Clone { struct FeedsClassifier; impl FeedsClassifier { + // Implements sniffing for mislabeled feeds (https://mimesniff.spec.whatwg.org/#sniffing-a-mislabeled-feed) fn classify_impl(&self, data: &[u8]) -> Option<(&'static str, &'static str)> { - // can not be feed unless length is > 3 + // Step 4: can not be feed unless length is > 3 if data.len() < 3 { return None; } @@ -403,6 +408,7 @@ impl FeedsClassifier { return None; } + // Steps 5.2.1 to 5.2.4 match eats_until(&mut matcher, b"?", b"?>") .chain(|| eats_until(&mut matcher, b"!--", b"-->")) .chain(|| eats_until(&mut matcher, b"!", b">")) { @@ -411,20 +417,23 @@ impl FeedsClassifier { Match::Start => return None } + // Step 5.2.5 if matcher.matches(b"rss") { return Some(("application", "rss+xml")); } + // Step 5.2.6 if matcher.matches(b"feed") { return Some(("application", "atom+xml")); } - if matcher.matches(b"rdf: RDF") { + // Step 5.2.7 + if matcher.matches(b"rdf:RDF") { while matcher.next().is_some() { match eats_until(&mut matcher, - b"http: //purl.org/rss/1.0/", - b"http: //www.w3.org/1999/02/22-rdf-syntax-ns#") + b"http://purl.org/rss/1.0/", + b"http://www.w3.org/1999/02/22-rdf-syntax-ns#") .chain(|| eats_until(&mut matcher, - b"http: //www.w3.org/1999/02/22-rdf-syntax-ns#", - b"http: //purl.org/rss/1.0/")) { + b"http://www.w3.org/1999/02/22-rdf-syntax-ns#", + b"http://purl.org/rss/1.0/")) { Match::StartAndEnd => return Some(("application", "rss+xml")), Match::DidNotMatch => {}, Match::Start => return None diff --git a/tests/unit/net/mime_classifier.rs b/tests/unit/net/mime_classifier.rs index b2e48926901..2c62878872e 100644 --- a/tests/unit/net/mime_classifier.rs +++ b/tests/unit/net/mime_classifier.rs @@ -434,7 +434,14 @@ fn test_sniff_utf_8_bom() { #[test] fn test_sniff_rss_feed() { + // RSS feeds test_sniff_full(&PathBuf::from("text/xml/feed.rss"), "application", "rss+xml", Some(("text", "html"))); + test_sniff_full(&PathBuf::from("text/xml/rdf_rss.xml"), "application", "rss+xml", Some(("text", "html"))); + // Not RSS feeds + test_sniff_full(&PathBuf::from("text/xml/rdf_rss_ko_1.xml"), "text", "html", Some(("text", "html"))); + test_sniff_full(&PathBuf::from("text/xml/rdf_rss_ko_2.xml"), "text", "html", Some(("text", "html"))); + test_sniff_full(&PathBuf::from("text/xml/rdf_rss_ko_3.xml"), "text", "html", Some(("text", "html"))); + test_sniff_full(&PathBuf::from("text/xml/rdf_rss_ko_4.xml"), "text", "html", Some(("text", "html"))); } #[test] diff --git a/tests/unit/net/parsable_mime/text/xml/rdf_rss.xml b/tests/unit/net/parsable_mime/text/xml/rdf_rss.xml new file mode 100644 index 00000000000..4c58f82974e --- /dev/null +++ b/tests/unit/net/parsable_mime/text/xml/rdf_rss.xml @@ -0,0 +1,7 @@ + + + + \ No newline at end of file diff --git a/tests/unit/net/parsable_mime/text/xml/rdf_rss_ko_1.xml b/tests/unit/net/parsable_mime/text/xml/rdf_rss_ko_1.xml new file mode 100644 index 00000000000..f6e486c5960 --- /dev/null +++ b/tests/unit/net/parsable_mime/text/xml/rdf_rss_ko_1.xml @@ -0,0 +1,7 @@ + + + + \ No newline at end of file diff --git a/tests/unit/net/parsable_mime/text/xml/rdf_rss_ko_2.xml b/tests/unit/net/parsable_mime/text/xml/rdf_rss_ko_2.xml new file mode 100644 index 00000000000..be8414382e5 --- /dev/null +++ b/tests/unit/net/parsable_mime/text/xml/rdf_rss_ko_2.xml @@ -0,0 +1,3 @@ + + + \ No newline at end of file diff --git a/tests/unit/net/parsable_mime/text/xml/rdf_rss_ko_3.xml b/tests/unit/net/parsable_mime/text/xml/rdf_rss_ko_3.xml new file mode 100644 index 00000000000..5f0f03f1e2d --- /dev/null +++ b/tests/unit/net/parsable_mime/text/xml/rdf_rss_ko_3.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/tests/unit/net/parsable_mime/text/xml/rdf_rss_ko_4.xml b/tests/unit/net/parsable_mime/text/xml/rdf_rss_ko_4.xml new file mode 100644 index 00000000000..c06a80cf1f8 --- /dev/null +++ b/tests/unit/net/parsable_mime/text/xml/rdf_rss_ko_4.xml @@ -0,0 +1,7 @@ + + + + \ No newline at end of file