mirror of
https://github.com/servo/servo.git
synced 2025-08-03 04:30:10 +01:00
Issue #7393: Properly sniff mislabeled feeds.
This commit is contained in:
parent
67cbda4be3
commit
5301e59965
7 changed files with 52 additions and 6 deletions
|
@ -125,6 +125,10 @@ impl <'a, T: Iterator<Item=&'a u8> + Clone> Matches for T {
|
|||
// Side effects
|
||||
// moves the iterator when match is found
|
||||
fn matches(&mut self, matches: &[u8]) -> bool {
|
||||
if self.clone().nth(matches.len()).is_none() {
|
||||
// there are less than matches.len() elements in self
|
||||
return false
|
||||
}
|
||||
let result = self.clone().zip(matches).all(|(s, m)| *s == *m);
|
||||
if result {
|
||||
self.nth(matches.len());
|
||||
|
@ -381,9 +385,10 @@ where T: Iterator<Item=&'a u8> + Clone {
|
|||
|
||||
struct FeedsClassifier;
|
||||
impl FeedsClassifier {
|
||||
// Implements sniffing for mislabeled feeds (https://mimesniff.spec.whatwg.org/#sniffing-a-mislabeled-feed)
|
||||
fn classify_impl(&self, data: &[u8]) -> Option<(&'static str, &'static str)> {
|
||||
|
||||
// can not be feed unless length is > 3
|
||||
// Step 4: can not be feed unless length is > 3
|
||||
if data.len() < 3 {
|
||||
return None;
|
||||
}
|
||||
|
@ -403,6 +408,7 @@ impl FeedsClassifier {
|
|||
return None;
|
||||
}
|
||||
|
||||
// Steps 5.2.1 to 5.2.4
|
||||
match eats_until(&mut matcher, b"?", b"?>")
|
||||
.chain(|| eats_until(&mut matcher, b"!--", b"-->"))
|
||||
.chain(|| eats_until(&mut matcher, b"!", b">")) {
|
||||
|
@ -411,20 +417,23 @@ impl FeedsClassifier {
|
|||
Match::Start => return None
|
||||
}
|
||||
|
||||
// Step 5.2.5
|
||||
if matcher.matches(b"rss") {
|
||||
return Some(("application", "rss+xml"));
|
||||
}
|
||||
// Step 5.2.6
|
||||
if matcher.matches(b"feed") {
|
||||
return Some(("application", "atom+xml"));
|
||||
}
|
||||
if matcher.matches(b"rdf: RDF") {
|
||||
// Step 5.2.7
|
||||
if matcher.matches(b"rdf:RDF") {
|
||||
while matcher.next().is_some() {
|
||||
match eats_until(&mut matcher,
|
||||
b"http: //purl.org/rss/1.0/",
|
||||
b"http: //www.w3.org/1999/02/22-rdf-syntax-ns#")
|
||||
b"http://purl.org/rss/1.0/",
|
||||
b"http://www.w3.org/1999/02/22-rdf-syntax-ns#")
|
||||
.chain(|| eats_until(&mut matcher,
|
||||
b"http: //www.w3.org/1999/02/22-rdf-syntax-ns#",
|
||||
b"http: //purl.org/rss/1.0/")) {
|
||||
b"http://www.w3.org/1999/02/22-rdf-syntax-ns#",
|
||||
b"http://purl.org/rss/1.0/")) {
|
||||
Match::StartAndEnd => return Some(("application", "rss+xml")),
|
||||
Match::DidNotMatch => {},
|
||||
Match::Start => return None
|
||||
|
|
|
@ -434,7 +434,14 @@ fn test_sniff_utf_8_bom() {
|
|||
|
||||
#[test]
|
||||
fn test_sniff_rss_feed() {
|
||||
// RSS feeds
|
||||
test_sniff_full(&PathBuf::from("text/xml/feed.rss"), "application", "rss+xml", Some(("text", "html")));
|
||||
test_sniff_full(&PathBuf::from("text/xml/rdf_rss.xml"), "application", "rss+xml", Some(("text", "html")));
|
||||
// Not RSS feeds
|
||||
test_sniff_full(&PathBuf::from("text/xml/rdf_rss_ko_1.xml"), "text", "html", Some(("text", "html")));
|
||||
test_sniff_full(&PathBuf::from("text/xml/rdf_rss_ko_2.xml"), "text", "html", Some(("text", "html")));
|
||||
test_sniff_full(&PathBuf::from("text/xml/rdf_rss_ko_3.xml"), "text", "html", Some(("text", "html")));
|
||||
test_sniff_full(&PathBuf::from("text/xml/rdf_rss_ko_4.xml"), "text", "html", Some(("text", "html")));
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
7
tests/unit/net/parsable_mime/text/xml/rdf_rss.xml
Normal file
7
tests/unit/net/parsable_mime/text/xml/rdf_rss.xml
Normal file
|
@ -0,0 +1,7 @@
|
|||
<!-- Good format for a "RDF feed" -->
|
||||
<?xml version="1.0"?>
|
||||
<rdf:RDF
|
||||
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||
xmlns="http://purl.org/rss/1.0/"
|
||||
>
|
||||
</rdf:RDF>
|
7
tests/unit/net/parsable_mime/text/xml/rdf_rss_ko_1.xml
Normal file
7
tests/unit/net/parsable_mime/text/xml/rdf_rss_ko_1.xml
Normal file
|
@ -0,0 +1,7 @@
|
|||
<!-- Bad format for a "RDF feed" (space between "rdf:" and "RDF") -->
|
||||
<?xml version="1.0"?>
|
||||
<rdf: RDF
|
||||
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||
xmlns="http://purl.org/rss/1.0/"
|
||||
>
|
||||
</rdf:RDF>
|
3
tests/unit/net/parsable_mime/text/xml/rdf_rss_ko_2.xml
Normal file
3
tests/unit/net/parsable_mime/text/xml/rdf_rss_ko_2.xml
Normal file
|
@ -0,0 +1,3 @@
|
|||
<!-- Bad format for a "RDF feed" (2 missing URLs) -->
|
||||
<?xml version="1.0"?>
|
||||
<rdf:RDF/>
|
6
tests/unit/net/parsable_mime/text/xml/rdf_rss_ko_3.xml
Normal file
6
tests/unit/net/parsable_mime/text/xml/rdf_rss_ko_3.xml
Normal file
|
@ -0,0 +1,6 @@
|
|||
<!-- Bad format for a "RDF feed" (one missing URL) -->
|
||||
<?xml version="1.0"?>
|
||||
<rdf:RDF
|
||||
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||
>
|
||||
</rdf:RDF>
|
7
tests/unit/net/parsable_mime/text/xml/rdf_rss_ko_4.xml
Normal file
7
tests/unit/net/parsable_mime/text/xml/rdf_rss_ko_4.xml
Normal file
|
@ -0,0 +1,7 @@
|
|||
<!-- Bad format for a "RDF feed" (unexpected space in first URL) -->
|
||||
<?xml version="1.0"?>
|
||||
<rdf:RDF
|
||||
xmlns:rdf="http: //www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||
xmlns="http://purl.org/rss/1.0/"
|
||||
>
|
||||
</rdf:RDF>
|
Loading…
Add table
Add a link
Reference in a new issue