mirror of
https://github.com/servo/servo.git
synced 2025-07-23 07:13:52 +01:00
Auto merge of #7449 - simartin:issue_7393, r=metajack
Issue #7393: Properly sniff mislabeled feeds. Hi, This patch is an attempt to fix https://github.com/servo/servo/issues/7393, where the code detecting mislabeled feeds (see https://mimesniff.spec.whatwg.org/#sniffing-a-mislabeled-feed) had spurious space in the URLs we need to match. Note that my testing (in particular rdf_rss_ko_2.xml) highlighted a flaw in "matches", that failed to check that there were more bytes in the string being checked than in the string we're checking against, which completely broke the whole step 5.2.7. Thanks in advance for your review. Cheers, Simon <!-- Reviewable:start --> [<img src="https://reviewable.io/review_button.png" height=40 alt="Review on Reviewable"/>](https://reviewable.io/reviews/servo/servo/7449) <!-- Reviewable:end -->
This commit is contained in:
commit
9f85370885
7 changed files with 52 additions and 6 deletions
|
@ -125,6 +125,10 @@ impl <'a, T: Iterator<Item=&'a u8> + Clone> Matches for T {
|
|||
// Side effects
|
||||
// moves the iterator when match is found
|
||||
fn matches(&mut self, matches: &[u8]) -> bool {
|
||||
if self.clone().nth(matches.len()).is_none() {
|
||||
// there are less than matches.len() elements in self
|
||||
return false
|
||||
}
|
||||
let result = self.clone().zip(matches).all(|(s, m)| *s == *m);
|
||||
if result {
|
||||
self.nth(matches.len());
|
||||
|
@ -381,9 +385,10 @@ where T: Iterator<Item=&'a u8> + Clone {
|
|||
|
||||
struct FeedsClassifier;
|
||||
impl FeedsClassifier {
|
||||
// Implements sniffing for mislabeled feeds (https://mimesniff.spec.whatwg.org/#sniffing-a-mislabeled-feed)
|
||||
fn classify_impl(&self, data: &[u8]) -> Option<(&'static str, &'static str)> {
|
||||
|
||||
// can not be feed unless length is > 3
|
||||
// Step 4: can not be feed unless length is > 3
|
||||
if data.len() < 3 {
|
||||
return None;
|
||||
}
|
||||
|
@ -403,6 +408,7 @@ impl FeedsClassifier {
|
|||
return None;
|
||||
}
|
||||
|
||||
// Steps 5.2.1 to 5.2.4
|
||||
match eats_until(&mut matcher, b"?", b"?>")
|
||||
.chain(|| eats_until(&mut matcher, b"!--", b"-->"))
|
||||
.chain(|| eats_until(&mut matcher, b"!", b">")) {
|
||||
|
@ -411,20 +417,23 @@ impl FeedsClassifier {
|
|||
Match::Start => return None
|
||||
}
|
||||
|
||||
// Step 5.2.5
|
||||
if matcher.matches(b"rss") {
|
||||
return Some(("application", "rss+xml"));
|
||||
}
|
||||
// Step 5.2.6
|
||||
if matcher.matches(b"feed") {
|
||||
return Some(("application", "atom+xml"));
|
||||
}
|
||||
if matcher.matches(b"rdf: RDF") {
|
||||
// Step 5.2.7
|
||||
if matcher.matches(b"rdf:RDF") {
|
||||
while matcher.next().is_some() {
|
||||
match eats_until(&mut matcher,
|
||||
b"http: //purl.org/rss/1.0/",
|
||||
b"http: //www.w3.org/1999/02/22-rdf-syntax-ns#")
|
||||
b"http://purl.org/rss/1.0/",
|
||||
b"http://www.w3.org/1999/02/22-rdf-syntax-ns#")
|
||||
.chain(|| eats_until(&mut matcher,
|
||||
b"http: //www.w3.org/1999/02/22-rdf-syntax-ns#",
|
||||
b"http: //purl.org/rss/1.0/")) {
|
||||
b"http://www.w3.org/1999/02/22-rdf-syntax-ns#",
|
||||
b"http://purl.org/rss/1.0/")) {
|
||||
Match::StartAndEnd => return Some(("application", "rss+xml")),
|
||||
Match::DidNotMatch => {},
|
||||
Match::Start => return None
|
||||
|
|
|
@ -434,7 +434,14 @@ fn test_sniff_utf_8_bom() {
|
|||
|
||||
#[test]
|
||||
fn test_sniff_rss_feed() {
|
||||
// RSS feeds
|
||||
test_sniff_full(&PathBuf::from("text/xml/feed.rss"), "application", "rss+xml", Some(("text", "html")));
|
||||
test_sniff_full(&PathBuf::from("text/xml/rdf_rss.xml"), "application", "rss+xml", Some(("text", "html")));
|
||||
// Not RSS feeds
|
||||
test_sniff_full(&PathBuf::from("text/xml/rdf_rss_ko_1.xml"), "text", "html", Some(("text", "html")));
|
||||
test_sniff_full(&PathBuf::from("text/xml/rdf_rss_ko_2.xml"), "text", "html", Some(("text", "html")));
|
||||
test_sniff_full(&PathBuf::from("text/xml/rdf_rss_ko_3.xml"), "text", "html", Some(("text", "html")));
|
||||
test_sniff_full(&PathBuf::from("text/xml/rdf_rss_ko_4.xml"), "text", "html", Some(("text", "html")));
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
7
tests/unit/net/parsable_mime/text/xml/rdf_rss.xml
Normal file
7
tests/unit/net/parsable_mime/text/xml/rdf_rss.xml
Normal file
|
@ -0,0 +1,7 @@
|
|||
<!-- Good format for a "RDF feed" -->
|
||||
<?xml version="1.0"?>
|
||||
<rdf:RDF
|
||||
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||
xmlns="http://purl.org/rss/1.0/"
|
||||
>
|
||||
</rdf:RDF>
|
7
tests/unit/net/parsable_mime/text/xml/rdf_rss_ko_1.xml
Normal file
7
tests/unit/net/parsable_mime/text/xml/rdf_rss_ko_1.xml
Normal file
|
@ -0,0 +1,7 @@
|
|||
<!-- Bad format for a "RDF feed" (space between "rdf:" and "RDF") -->
|
||||
<?xml version="1.0"?>
|
||||
<rdf: RDF
|
||||
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||
xmlns="http://purl.org/rss/1.0/"
|
||||
>
|
||||
</rdf:RDF>
|
3
tests/unit/net/parsable_mime/text/xml/rdf_rss_ko_2.xml
Normal file
3
tests/unit/net/parsable_mime/text/xml/rdf_rss_ko_2.xml
Normal file
|
@ -0,0 +1,3 @@
|
|||
<!-- Bad format for a "RDF feed" (2 missing URLs) -->
|
||||
<?xml version="1.0"?>
|
||||
<rdf:RDF/>
|
6
tests/unit/net/parsable_mime/text/xml/rdf_rss_ko_3.xml
Normal file
6
tests/unit/net/parsable_mime/text/xml/rdf_rss_ko_3.xml
Normal file
|
@ -0,0 +1,6 @@
|
|||
<!-- Bad format for a "RDF feed" (one missing URL) -->
|
||||
<?xml version="1.0"?>
|
||||
<rdf:RDF
|
||||
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||
>
|
||||
</rdf:RDF>
|
7
tests/unit/net/parsable_mime/text/xml/rdf_rss_ko_4.xml
Normal file
7
tests/unit/net/parsable_mime/text/xml/rdf_rss_ko_4.xml
Normal file
|
@ -0,0 +1,7 @@
|
|||
<!-- Bad format for a "RDF feed" (unexpected space in first URL) -->
|
||||
<?xml version="1.0"?>
|
||||
<rdf:RDF
|
||||
xmlns:rdf="http: //www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||
xmlns="http://purl.org/rss/1.0/"
|
||||
>
|
||||
</rdf:RDF>
|
Loading…
Add table
Add a link
Reference in a new issue