Use data_url::Mime to parse the MIME Types

This commit follows the spectification
https://fetch.spec.whatwg.org/#concept-header-extract-mime-type
This commit is contained in:
Vincent Ricard 2020-10-29 18:03:22 +01:00
parent 15a435a7ae
commit 43b3d93aec
10 changed files with 147 additions and 60 deletions

1
Cargo.lock generated
View file

@ -4910,6 +4910,7 @@ dependencies = [
"cookie",
"crossbeam-channel",
"cssparser",
"data-url",
"deny_public_fields",
"devtools_traits",
"dom_struct",

View file

@ -43,6 +43,7 @@ content-security-policy = { version = "0.4.0", features = ["serde"] }
cookie = "0.11"
crossbeam-channel = "0.4"
cssparser = "0.27"
data-url = "0.1.0"
deny_public_fields = { path = "../deny_public_fields" }
devtools_traits = { path = "../devtools_traits" }
dom_struct = { path = "../dom_struct" }

View file

@ -10,8 +10,9 @@ use crate::dom::bindings::reflector::{reflect_dom_object, Reflector};
use crate::dom::bindings::root::DomRoot;
use crate::dom::bindings::str::{is_token, ByteString};
use crate::dom::globalscope::GlobalScope;
use data_url::mime::Mime as DataUrlMime;
use dom_struct::dom_struct;
use http::header::{self, HeaderMap as HyperHeaders, HeaderName, HeaderValue};
use http::header::{HeaderMap as HyperHeaders, HeaderName, HeaderValue};
use net_traits::request::is_cors_safelisted_request_header;
use std::cell::Cell;
use std::str::{self, FromStr};
@ -269,10 +270,7 @@ impl Headers {
// https://fetch.spec.whatwg.org/#concept-header-extract-mime-type
pub fn extract_mime_type(&self) -> Vec<u8> {
self.header_list
.borrow()
.get(header::CONTENT_TYPE)
.map_or(vec![], |v| v.as_bytes().to_owned())
extract_mime_type(&*self.header_list.borrow()).unwrap_or(vec![])
}
pub fn sort_header_list(&self) -> Vec<(String, Vec<u8>)> {
@ -469,3 +467,72 @@ pub fn is_obs_text(x: u8) -> bool {
_ => false,
}
}
// https://fetch.spec.whatwg.org/#concept-header-extract-mime-type
// This function uses data_url::Mime to parse the MIME Type because
// mime::Mime does not provide a parser following the Fetch spec
// see https://github.com/hyperium/mime/issues/106
pub fn extract_mime_type(headers: &HyperHeaders) -> Option<Vec<u8>> {
let mut charset: Option<String> = None;
let mut essence: String = "".to_string();
let mut mime_type: Option<DataUrlMime> = None;
// Step 4
let headers_values = headers.get_all(http::header::CONTENT_TYPE).iter();
// Step 5
if headers_values.size_hint() == (0, Some(0)) {
return None;
}
// Step 6
for header_value in headers_values {
// Step 6.1
match DataUrlMime::from_str(header_value.to_str().unwrap_or("")) {
// Step 6.2
Err(_) => continue,
Ok(temp_mime) => {
let temp_essence = format!("{}/{}", temp_mime.type_, temp_mime.subtype);
// Step 6.2
if temp_essence == "*/*" {
continue;
}
let temp_charset = &temp_mime.get_parameter("charset");
// Step 6.3
mime_type = Some(DataUrlMime {
type_: temp_mime.type_.to_string(),
subtype: temp_mime.subtype.to_string(),
parameters: temp_mime.parameters.clone(),
});
// Step 6.4
if temp_essence != essence {
charset = temp_charset.map(|c| c.to_string());
essence = temp_essence.to_owned();
} else {
// Step 6.5
if temp_charset.is_none() && charset.is_some() {
let DataUrlMime {
type_: t,
subtype: st,
parameters: p,
} = mime_type.unwrap();
let mut params = p;
params.push(("charset".to_string(), charset.clone().unwrap()));
mime_type = Some(DataUrlMime {
type_: t.to_string(),
subtype: st.to_string(),
parameters: params,
})
}
}
},
}
}
// Step 7, 8
return mime_type.map(|m| format!("{}", m).into_bytes());
}

View file

@ -22,7 +22,7 @@ use crate::dom::document::{Document, HasBrowsingContext, IsHTMLDocument};
use crate::dom::event::{Event, EventBubbles, EventCancelable};
use crate::dom::eventtarget::EventTarget;
use crate::dom::globalscope::GlobalScope;
use crate::dom::headers::is_forbidden_header_name;
use crate::dom::headers::{extract_mime_type, is_forbidden_header_name};
use crate::dom::node::Node;
use crate::dom::performanceresourcetiming::InitiatorType;
use crate::dom::progressevent::ProgressEvent;
@ -1364,7 +1364,7 @@ impl XMLHttpRequest {
// Caching: if we have existing response xml, redirect it directly
let response = self.response_xml.get();
if response.is_some() {
return self.response_xml.get();
return response;
}
// Step 1
@ -1372,41 +1372,59 @@ impl XMLHttpRequest {
return None;
}
// Step 2
let mime_type = self.final_mime_type();
// TODO: prescan the response to determine encoding if final charset is null
// Step 5.3, 7
let charset = self.final_charset().unwrap_or(UTF_8);
let temp_doc: DomRoot<Document>;
match mime_type {
Some(ref mime) if mime.type_() == mime::TEXT && mime.subtype() == mime::HTML => {
// Step 5
// Step 4
if self.response_type.get() == XMLHttpRequestResponseType::_empty {
return None;
} else {
// Step 6
// TODO Step 5.2 "If charset is null, prescan the first 1024 bytes of xhrs received bytes"
// Step 5
temp_doc = self.document_text_html();
}
},
// Step 7
Some(ref mime)
if (mime.type_() == mime::TEXT && mime.subtype() == mime::XML) ||
(mime.type_() == mime::APPLICATION && mime.subtype() == mime::XML) =>
{
temp_doc = self.handle_xml();
}
None => {
temp_doc = self.handle_xml();
// Not sure it the parser should throw an error for this case
// The specification does not indicates this test,
// but for now we check the document has no child nodes
let has_no_child_nodes = temp_doc.upcast::<Node>().children().next().is_none();
if has_no_child_nodes {
return None;
}
},
Some(ref mime) if mime.suffix() == Some(mime::XML) => {
Some(ref mime)
if (mime.type_() == mime::TEXT && mime.subtype() == mime::XML) ||
(mime.type_() == mime::APPLICATION && mime.subtype() == mime::XML) ||
mime.suffix() == Some(mime::XML) =>
{
temp_doc = self.handle_xml();
},
// Step 4
// Not sure it the parser should throw an error for this case
// The specification does not indicates this test,
// but for now we check the document has no child nodes
let has_no_child_nodes = temp_doc.upcast::<Node>().children().next().is_none();
if has_no_child_nodes {
return None;
}
}
// Step 3
_ => {
return None;
},
}
// Step 9
// Step 8
temp_doc.set_encoding(charset);
// Step 13
// Step 9 to 11
// Done by handle_text_html and handle_xml
// Step 12
self.response_xml.set(Some(&temp_doc));
return self.response_xml.get();
}
@ -1585,14 +1603,25 @@ impl XMLHttpRequest {
}
}
/// <https://xhr.spec.whatwg.org/#response-mime-type>
fn response_mime_type(&self) -> Option<Mime> {
return extract_mime_type(&self.response_headers.borrow())
.map(|mime_as_bytes| {
String::from_utf8(mime_as_bytes)
.unwrap_or_default()
.parse()
.ok()
})
.flatten()
.or(Some(mime::TEXT_XML));
}
/// <https://xhr.spec.whatwg.org/#final-mime-type>
fn final_mime_type(&self) -> Option<Mime> {
if self.override_mime_type.borrow().is_some() {
self.override_mime_type.borrow().clone()
} else {
match self.response_headers.borrow().typed_get::<ContentType>() {
Some(ct) => Some(ct.into()),
None => None,
}
return self.response_mime_type();
}
}
}

View file

@ -96,9 +96,6 @@
[Request: combined response Content-Type: text/plain;charset=gbk text/plain]
expected: NOTRUN
[fetch(): separate response Content-Type: text/plain]
expected: FAIL
[fetch(): combined response Content-Type: text/html;" \\" text/plain ";charset=GBK]
expected: NOTRUN
@ -312,21 +309,6 @@
[fetch(): separate response Content-Type: text/plain ]
expected: NOTRUN
[<iframe>: separate response Content-Type: text/plain */*;charset=gbk]
expected: FAIL
[<iframe>: separate response Content-Type: text/html */*]
expected: FAIL
[<iframe>: separate response Content-Type: text/html;x=" text/plain]
expected: FAIL
[<iframe>: combined response Content-Type: text/html;" \\" text/plain]
expected: FAIL
[<iframe>: separate response Content-Type: text/html;" \\" text/plain]
expected: FAIL
[<iframe>: combined response Content-Type: text/html;x=" text/plain]
expected: FAIL

View file

@ -1,4 +1,16 @@
[contenttype.any.worker.html]
[Response with Content-Type "application/wasm;": compileStreaming]
expected: FAIL
[Response with Content-Type "application/wasm;": instantiateStreaming]
expected: FAIL
[Response with Content-Type "application/wasm;x": compileStreaming]
expected: FAIL
[Response with Content-Type "application/wasm;x": instantiateStreaming]
expected: FAIL
[contenttype.any.sharedworker.html]
expected: ERROR
@ -7,6 +19,18 @@
[contenttype.any.html]
[Response with Content-Type "application/wasm;": compileStreaming]
expected: FAIL
[Response with Content-Type "application/wasm;": instantiateStreaming]
expected: FAIL
[Response with Content-Type "application/wasm;x": compileStreaming]
expected: FAIL
[Response with Content-Type "application/wasm;x": instantiateStreaming]
expected: FAIL
[contenttype.any.serviceworker.html]
expected: ERROR

View file

@ -1,10 +1,5 @@
[overridemimetype-blob.html]
type: testharness
[Use text/xml as fallback MIME type]
expected: FAIL
[Use text/xml as fallback MIME type, 2]
expected: FAIL
[Bogus MIME type should end up as application/octet-stream]
expected: FAIL

View file

@ -1,5 +0,0 @@
[responsexml-basic.htm]
type: testharness
[responseXML on empty response documents]
expected: FAIL

View file

@ -1,4 +0,0 @@
[responsexml-media-type.htm]
[XMLHttpRequest: responseXML MIME type tests ('text/plain;+xml', should not parse)]
expected: FAIL

View file

@ -1,8 +1,5 @@
[responsexml-non-well-formed.htm]
type: testharness
[XMLHttpRequest: responseXML non well-formed tests]
expected: FAIL
[XMLHttpRequest: responseXML non well-formed tests 1]
expected: FAIL