mirror of
https://github.com/servo/servo.git
synced 2025-08-05 05:30:08 +01:00
Issue #7393: Properly sniff mislabeled feeds.
This commit is contained in:
parent
67cbda4be3
commit
5301e59965
7 changed files with 52 additions and 6 deletions
|
@ -125,6 +125,10 @@ impl <'a, T: Iterator<Item=&'a u8> + Clone> Matches for T {
|
||||||
// Side effects
|
// Side effects
|
||||||
// moves the iterator when match is found
|
// moves the iterator when match is found
|
||||||
fn matches(&mut self, matches: &[u8]) -> bool {
|
fn matches(&mut self, matches: &[u8]) -> bool {
|
||||||
|
if self.clone().nth(matches.len()).is_none() {
|
||||||
|
// there are less than matches.len() elements in self
|
||||||
|
return false
|
||||||
|
}
|
||||||
let result = self.clone().zip(matches).all(|(s, m)| *s == *m);
|
let result = self.clone().zip(matches).all(|(s, m)| *s == *m);
|
||||||
if result {
|
if result {
|
||||||
self.nth(matches.len());
|
self.nth(matches.len());
|
||||||
|
@ -381,9 +385,10 @@ where T: Iterator<Item=&'a u8> + Clone {
|
||||||
|
|
||||||
struct FeedsClassifier;
|
struct FeedsClassifier;
|
||||||
impl FeedsClassifier {
|
impl FeedsClassifier {
|
||||||
|
// Implements sniffing for mislabeled feeds (https://mimesniff.spec.whatwg.org/#sniffing-a-mislabeled-feed)
|
||||||
fn classify_impl(&self, data: &[u8]) -> Option<(&'static str, &'static str)> {
|
fn classify_impl(&self, data: &[u8]) -> Option<(&'static str, &'static str)> {
|
||||||
|
|
||||||
// can not be feed unless length is > 3
|
// Step 4: can not be feed unless length is > 3
|
||||||
if data.len() < 3 {
|
if data.len() < 3 {
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
|
@ -403,6 +408,7 @@ impl FeedsClassifier {
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Steps 5.2.1 to 5.2.4
|
||||||
match eats_until(&mut matcher, b"?", b"?>")
|
match eats_until(&mut matcher, b"?", b"?>")
|
||||||
.chain(|| eats_until(&mut matcher, b"!--", b"-->"))
|
.chain(|| eats_until(&mut matcher, b"!--", b"-->"))
|
||||||
.chain(|| eats_until(&mut matcher, b"!", b">")) {
|
.chain(|| eats_until(&mut matcher, b"!", b">")) {
|
||||||
|
@ -411,20 +417,23 @@ impl FeedsClassifier {
|
||||||
Match::Start => return None
|
Match::Start => return None
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Step 5.2.5
|
||||||
if matcher.matches(b"rss") {
|
if matcher.matches(b"rss") {
|
||||||
return Some(("application", "rss+xml"));
|
return Some(("application", "rss+xml"));
|
||||||
}
|
}
|
||||||
|
// Step 5.2.6
|
||||||
if matcher.matches(b"feed") {
|
if matcher.matches(b"feed") {
|
||||||
return Some(("application", "atom+xml"));
|
return Some(("application", "atom+xml"));
|
||||||
}
|
}
|
||||||
if matcher.matches(b"rdf: RDF") {
|
// Step 5.2.7
|
||||||
|
if matcher.matches(b"rdf:RDF") {
|
||||||
while matcher.next().is_some() {
|
while matcher.next().is_some() {
|
||||||
match eats_until(&mut matcher,
|
match eats_until(&mut matcher,
|
||||||
b"http: //purl.org/rss/1.0/",
|
b"http://purl.org/rss/1.0/",
|
||||||
b"http: //www.w3.org/1999/02/22-rdf-syntax-ns#")
|
b"http://www.w3.org/1999/02/22-rdf-syntax-ns#")
|
||||||
.chain(|| eats_until(&mut matcher,
|
.chain(|| eats_until(&mut matcher,
|
||||||
b"http: //www.w3.org/1999/02/22-rdf-syntax-ns#",
|
b"http://www.w3.org/1999/02/22-rdf-syntax-ns#",
|
||||||
b"http: //purl.org/rss/1.0/")) {
|
b"http://purl.org/rss/1.0/")) {
|
||||||
Match::StartAndEnd => return Some(("application", "rss+xml")),
|
Match::StartAndEnd => return Some(("application", "rss+xml")),
|
||||||
Match::DidNotMatch => {},
|
Match::DidNotMatch => {},
|
||||||
Match::Start => return None
|
Match::Start => return None
|
||||||
|
|
|
@ -434,7 +434,14 @@ fn test_sniff_utf_8_bom() {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_sniff_rss_feed() {
|
fn test_sniff_rss_feed() {
|
||||||
|
// RSS feeds
|
||||||
test_sniff_full(&PathBuf::from("text/xml/feed.rss"), "application", "rss+xml", Some(("text", "html")));
|
test_sniff_full(&PathBuf::from("text/xml/feed.rss"), "application", "rss+xml", Some(("text", "html")));
|
||||||
|
test_sniff_full(&PathBuf::from("text/xml/rdf_rss.xml"), "application", "rss+xml", Some(("text", "html")));
|
||||||
|
// Not RSS feeds
|
||||||
|
test_sniff_full(&PathBuf::from("text/xml/rdf_rss_ko_1.xml"), "text", "html", Some(("text", "html")));
|
||||||
|
test_sniff_full(&PathBuf::from("text/xml/rdf_rss_ko_2.xml"), "text", "html", Some(("text", "html")));
|
||||||
|
test_sniff_full(&PathBuf::from("text/xml/rdf_rss_ko_3.xml"), "text", "html", Some(("text", "html")));
|
||||||
|
test_sniff_full(&PathBuf::from("text/xml/rdf_rss_ko_4.xml"), "text", "html", Some(("text", "html")));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
|
7
tests/unit/net/parsable_mime/text/xml/rdf_rss.xml
Normal file
7
tests/unit/net/parsable_mime/text/xml/rdf_rss.xml
Normal file
|
@ -0,0 +1,7 @@
|
||||||
|
<!-- Good format for a "RDF feed" -->
|
||||||
|
<?xml version="1.0"?>
|
||||||
|
<rdf:RDF
|
||||||
|
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||||
|
xmlns="http://purl.org/rss/1.0/"
|
||||||
|
>
|
||||||
|
</rdf:RDF>
|
7
tests/unit/net/parsable_mime/text/xml/rdf_rss_ko_1.xml
Normal file
7
tests/unit/net/parsable_mime/text/xml/rdf_rss_ko_1.xml
Normal file
|
@ -0,0 +1,7 @@
|
||||||
|
<!-- Bad format for a "RDF feed" (space between "rdf:" and "RDF") -->
|
||||||
|
<?xml version="1.0"?>
|
||||||
|
<rdf: RDF
|
||||||
|
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||||
|
xmlns="http://purl.org/rss/1.0/"
|
||||||
|
>
|
||||||
|
</rdf:RDF>
|
3
tests/unit/net/parsable_mime/text/xml/rdf_rss_ko_2.xml
Normal file
3
tests/unit/net/parsable_mime/text/xml/rdf_rss_ko_2.xml
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
<!-- Bad format for a "RDF feed" (2 missing URLs) -->
|
||||||
|
<?xml version="1.0"?>
|
||||||
|
<rdf:RDF/>
|
6
tests/unit/net/parsable_mime/text/xml/rdf_rss_ko_3.xml
Normal file
6
tests/unit/net/parsable_mime/text/xml/rdf_rss_ko_3.xml
Normal file
|
@ -0,0 +1,6 @@
|
||||||
|
<!-- Bad format for a "RDF feed" (one missing URL) -->
|
||||||
|
<?xml version="1.0"?>
|
||||||
|
<rdf:RDF
|
||||||
|
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||||
|
>
|
||||||
|
</rdf:RDF>
|
7
tests/unit/net/parsable_mime/text/xml/rdf_rss_ko_4.xml
Normal file
7
tests/unit/net/parsable_mime/text/xml/rdf_rss_ko_4.xml
Normal file
|
@ -0,0 +1,7 @@
|
||||||
|
<!-- Bad format for a "RDF feed" (unexpected space in first URL) -->
|
||||||
|
<?xml version="1.0"?>
|
||||||
|
<rdf:RDF
|
||||||
|
xmlns:rdf="http: //www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||||
|
xmlns="http://purl.org/rss/1.0/"
|
||||||
|
>
|
||||||
|
</rdf:RDF>
|
Loading…
Add table
Add a link
Reference in a new issue