Use data_url::Mime to parse the MIME Types

This commit follows the spectification
https://fetch.spec.whatwg.org/#concept-header-extract-mime-type
This commit is contained in:
Vincent Ricard 2020-10-29 18:03:22 +01:00
parent 15a435a7ae
commit 43b3d93aec
10 changed files with 147 additions and 60 deletions

1
Cargo.lock generated
View file

@ -4910,6 +4910,7 @@ dependencies = [
"cookie", "cookie",
"crossbeam-channel", "crossbeam-channel",
"cssparser", "cssparser",
"data-url",
"deny_public_fields", "deny_public_fields",
"devtools_traits", "devtools_traits",
"dom_struct", "dom_struct",

View file

@ -43,6 +43,7 @@ content-security-policy = { version = "0.4.0", features = ["serde"] }
cookie = "0.11" cookie = "0.11"
crossbeam-channel = "0.4" crossbeam-channel = "0.4"
cssparser = "0.27" cssparser = "0.27"
data-url = "0.1.0"
deny_public_fields = { path = "../deny_public_fields" } deny_public_fields = { path = "../deny_public_fields" }
devtools_traits = { path = "../devtools_traits" } devtools_traits = { path = "../devtools_traits" }
dom_struct = { path = "../dom_struct" } dom_struct = { path = "../dom_struct" }

View file

@ -10,8 +10,9 @@ use crate::dom::bindings::reflector::{reflect_dom_object, Reflector};
use crate::dom::bindings::root::DomRoot; use crate::dom::bindings::root::DomRoot;
use crate::dom::bindings::str::{is_token, ByteString}; use crate::dom::bindings::str::{is_token, ByteString};
use crate::dom::globalscope::GlobalScope; use crate::dom::globalscope::GlobalScope;
use data_url::mime::Mime as DataUrlMime;
use dom_struct::dom_struct; use dom_struct::dom_struct;
use http::header::{self, HeaderMap as HyperHeaders, HeaderName, HeaderValue}; use http::header::{HeaderMap as HyperHeaders, HeaderName, HeaderValue};
use net_traits::request::is_cors_safelisted_request_header; use net_traits::request::is_cors_safelisted_request_header;
use std::cell::Cell; use std::cell::Cell;
use std::str::{self, FromStr}; use std::str::{self, FromStr};
@ -269,10 +270,7 @@ impl Headers {
// https://fetch.spec.whatwg.org/#concept-header-extract-mime-type // https://fetch.spec.whatwg.org/#concept-header-extract-mime-type
pub fn extract_mime_type(&self) -> Vec<u8> { pub fn extract_mime_type(&self) -> Vec<u8> {
self.header_list extract_mime_type(&*self.header_list.borrow()).unwrap_or(vec![])
.borrow()
.get(header::CONTENT_TYPE)
.map_or(vec![], |v| v.as_bytes().to_owned())
} }
pub fn sort_header_list(&self) -> Vec<(String, Vec<u8>)> { pub fn sort_header_list(&self) -> Vec<(String, Vec<u8>)> {
@ -469,3 +467,72 @@ pub fn is_obs_text(x: u8) -> bool {
_ => false, _ => false,
} }
} }
// https://fetch.spec.whatwg.org/#concept-header-extract-mime-type
// This function uses data_url::Mime to parse the MIME Type because
// mime::Mime does not provide a parser following the Fetch spec
// see https://github.com/hyperium/mime/issues/106
pub fn extract_mime_type(headers: &HyperHeaders) -> Option<Vec<u8>> {
let mut charset: Option<String> = None;
let mut essence: String = "".to_string();
let mut mime_type: Option<DataUrlMime> = None;
// Step 4
let headers_values = headers.get_all(http::header::CONTENT_TYPE).iter();
// Step 5
if headers_values.size_hint() == (0, Some(0)) {
return None;
}
// Step 6
for header_value in headers_values {
// Step 6.1
match DataUrlMime::from_str(header_value.to_str().unwrap_or("")) {
// Step 6.2
Err(_) => continue,
Ok(temp_mime) => {
let temp_essence = format!("{}/{}", temp_mime.type_, temp_mime.subtype);
// Step 6.2
if temp_essence == "*/*" {
continue;
}
let temp_charset = &temp_mime.get_parameter("charset");
// Step 6.3
mime_type = Some(DataUrlMime {
type_: temp_mime.type_.to_string(),
subtype: temp_mime.subtype.to_string(),
parameters: temp_mime.parameters.clone(),
});
// Step 6.4
if temp_essence != essence {
charset = temp_charset.map(|c| c.to_string());
essence = temp_essence.to_owned();
} else {
// Step 6.5
if temp_charset.is_none() && charset.is_some() {
let DataUrlMime {
type_: t,
subtype: st,
parameters: p,
} = mime_type.unwrap();
let mut params = p;
params.push(("charset".to_string(), charset.clone().unwrap()));
mime_type = Some(DataUrlMime {
type_: t.to_string(),
subtype: st.to_string(),
parameters: params,
})
}
}
},
}
}
// Step 7, 8
return mime_type.map(|m| format!("{}", m).into_bytes());
}

View file

@ -22,7 +22,7 @@ use crate::dom::document::{Document, HasBrowsingContext, IsHTMLDocument};
use crate::dom::event::{Event, EventBubbles, EventCancelable}; use crate::dom::event::{Event, EventBubbles, EventCancelable};
use crate::dom::eventtarget::EventTarget; use crate::dom::eventtarget::EventTarget;
use crate::dom::globalscope::GlobalScope; use crate::dom::globalscope::GlobalScope;
use crate::dom::headers::is_forbidden_header_name; use crate::dom::headers::{extract_mime_type, is_forbidden_header_name};
use crate::dom::node::Node; use crate::dom::node::Node;
use crate::dom::performanceresourcetiming::InitiatorType; use crate::dom::performanceresourcetiming::InitiatorType;
use crate::dom::progressevent::ProgressEvent; use crate::dom::progressevent::ProgressEvent;
@ -1364,7 +1364,7 @@ impl XMLHttpRequest {
// Caching: if we have existing response xml, redirect it directly // Caching: if we have existing response xml, redirect it directly
let response = self.response_xml.get(); let response = self.response_xml.get();
if response.is_some() { if response.is_some() {
return self.response_xml.get(); return response;
} }
// Step 1 // Step 1
@ -1372,41 +1372,59 @@ impl XMLHttpRequest {
return None; return None;
} }
// Step 2
let mime_type = self.final_mime_type(); let mime_type = self.final_mime_type();
// TODO: prescan the response to determine encoding if final charset is null // Step 5.3, 7
let charset = self.final_charset().unwrap_or(UTF_8); let charset = self.final_charset().unwrap_or(UTF_8);
let temp_doc: DomRoot<Document>; let temp_doc: DomRoot<Document>;
match mime_type { match mime_type {
Some(ref mime) if mime.type_() == mime::TEXT && mime.subtype() == mime::HTML => { Some(ref mime) if mime.type_() == mime::TEXT && mime.subtype() == mime::HTML => {
// Step 5 // Step 4
if self.response_type.get() == XMLHttpRequestResponseType::_empty { if self.response_type.get() == XMLHttpRequestResponseType::_empty {
return None; return None;
} else { } else {
// Step 6 // TODO Step 5.2 "If charset is null, prescan the first 1024 bytes of xhrs received bytes"
// Step 5
temp_doc = self.document_text_html(); temp_doc = self.document_text_html();
} }
}, },
// Step 7 // Step 7
Some(ref mime)
if (mime.type_() == mime::TEXT && mime.subtype() == mime::XML) ||
(mime.type_() == mime::APPLICATION && mime.subtype() == mime::XML) =>
{
temp_doc = self.handle_xml();
}
None => { None => {
temp_doc = self.handle_xml(); temp_doc = self.handle_xml();
// Not sure it the parser should throw an error for this case
// The specification does not indicates this test,
// but for now we check the document has no child nodes
let has_no_child_nodes = temp_doc.upcast::<Node>().children().next().is_none();
if has_no_child_nodes {
return None;
}
}, },
Some(ref mime) if mime.suffix() == Some(mime::XML) => { Some(ref mime)
if (mime.type_() == mime::TEXT && mime.subtype() == mime::XML) ||
(mime.type_() == mime::APPLICATION && mime.subtype() == mime::XML) ||
mime.suffix() == Some(mime::XML) =>
{
temp_doc = self.handle_xml(); temp_doc = self.handle_xml();
}, // Not sure it the parser should throw an error for this case
// Step 4 // The specification does not indicates this test,
// but for now we check the document has no child nodes
let has_no_child_nodes = temp_doc.upcast::<Node>().children().next().is_none();
if has_no_child_nodes {
return None;
}
}
// Step 3
_ => { _ => {
return None; return None;
}, },
} }
// Step 9 // Step 8
temp_doc.set_encoding(charset); temp_doc.set_encoding(charset);
// Step 13
// Step 9 to 11
// Done by handle_text_html and handle_xml
// Step 12
self.response_xml.set(Some(&temp_doc)); self.response_xml.set(Some(&temp_doc));
return self.response_xml.get(); return self.response_xml.get();
} }
@ -1585,14 +1603,25 @@ impl XMLHttpRequest {
} }
} }
/// <https://xhr.spec.whatwg.org/#response-mime-type>
fn response_mime_type(&self) -> Option<Mime> {
return extract_mime_type(&self.response_headers.borrow())
.map(|mime_as_bytes| {
String::from_utf8(mime_as_bytes)
.unwrap_or_default()
.parse()
.ok()
})
.flatten()
.or(Some(mime::TEXT_XML));
}
/// <https://xhr.spec.whatwg.org/#final-mime-type>
fn final_mime_type(&self) -> Option<Mime> { fn final_mime_type(&self) -> Option<Mime> {
if self.override_mime_type.borrow().is_some() { if self.override_mime_type.borrow().is_some() {
self.override_mime_type.borrow().clone() self.override_mime_type.borrow().clone()
} else { } else {
match self.response_headers.borrow().typed_get::<ContentType>() { return self.response_mime_type();
Some(ct) => Some(ct.into()),
None => None,
}
} }
} }
} }

View file

@ -96,9 +96,6 @@
[Request: combined response Content-Type: text/plain;charset=gbk text/plain] [Request: combined response Content-Type: text/plain;charset=gbk text/plain]
expected: NOTRUN expected: NOTRUN
[fetch(): separate response Content-Type: text/plain]
expected: FAIL
[fetch(): combined response Content-Type: text/html;" \\" text/plain ";charset=GBK] [fetch(): combined response Content-Type: text/html;" \\" text/plain ";charset=GBK]
expected: NOTRUN expected: NOTRUN
@ -312,21 +309,6 @@
[fetch(): separate response Content-Type: text/plain ] [fetch(): separate response Content-Type: text/plain ]
expected: NOTRUN expected: NOTRUN
[<iframe>: separate response Content-Type: text/plain */*;charset=gbk]
expected: FAIL
[<iframe>: separate response Content-Type: text/html */*]
expected: FAIL
[<iframe>: separate response Content-Type: text/html;x=" text/plain]
expected: FAIL
[<iframe>: combined response Content-Type: text/html;" \\" text/plain]
expected: FAIL
[<iframe>: separate response Content-Type: text/html;" \\" text/plain]
expected: FAIL
[<iframe>: combined response Content-Type: text/html;x=" text/plain] [<iframe>: combined response Content-Type: text/html;x=" text/plain]
expected: FAIL expected: FAIL

View file

@ -1,4 +1,16 @@
[contenttype.any.worker.html] [contenttype.any.worker.html]
[Response with Content-Type "application/wasm;": compileStreaming]
expected: FAIL
[Response with Content-Type "application/wasm;": instantiateStreaming]
expected: FAIL
[Response with Content-Type "application/wasm;x": compileStreaming]
expected: FAIL
[Response with Content-Type "application/wasm;x": instantiateStreaming]
expected: FAIL
[contenttype.any.sharedworker.html] [contenttype.any.sharedworker.html]
expected: ERROR expected: ERROR
@ -7,6 +19,18 @@
[contenttype.any.html] [contenttype.any.html]
[Response with Content-Type "application/wasm;": compileStreaming]
expected: FAIL
[Response with Content-Type "application/wasm;": instantiateStreaming]
expected: FAIL
[Response with Content-Type "application/wasm;x": compileStreaming]
expected: FAIL
[Response with Content-Type "application/wasm;x": instantiateStreaming]
expected: FAIL
[contenttype.any.serviceworker.html] [contenttype.any.serviceworker.html]
expected: ERROR expected: ERROR

View file

@ -1,10 +1,5 @@
[overridemimetype-blob.html] [overridemimetype-blob.html]
type: testharness type: testharness
[Use text/xml as fallback MIME type]
expected: FAIL
[Use text/xml as fallback MIME type, 2]
expected: FAIL
[Bogus MIME type should end up as application/octet-stream] [Bogus MIME type should end up as application/octet-stream]
expected: FAIL expected: FAIL

View file

@ -1,5 +0,0 @@
[responsexml-basic.htm]
type: testharness
[responseXML on empty response documents]
expected: FAIL

View file

@ -1,4 +0,0 @@
[responsexml-media-type.htm]
[XMLHttpRequest: responseXML MIME type tests ('text/plain;+xml', should not parse)]
expected: FAIL

View file

@ -1,8 +1,5 @@
[responsexml-non-well-formed.htm] [responsexml-non-well-formed.htm]
type: testharness type: testharness
[XMLHttpRequest: responseXML non well-formed tests]
expected: FAIL
[XMLHttpRequest: responseXML non well-formed tests 1] [XMLHttpRequest: responseXML non well-formed tests 1]
expected: FAIL expected: FAIL