diff --git a/Cargo.lock b/Cargo.lock index 871b860a24d..c0ff53528f0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4910,6 +4910,7 @@ dependencies = [ "cookie", "crossbeam-channel", "cssparser", + "data-url", "deny_public_fields", "devtools_traits", "dom_struct", diff --git a/components/script/Cargo.toml b/components/script/Cargo.toml index d155919ba12..3a3767906a1 100644 --- a/components/script/Cargo.toml +++ b/components/script/Cargo.toml @@ -43,6 +43,7 @@ content-security-policy = { version = "0.4.0", features = ["serde"] } cookie = "0.11" crossbeam-channel = "0.4" cssparser = "0.27" +data-url = "0.1.0" deny_public_fields = { path = "../deny_public_fields" } devtools_traits = { path = "../devtools_traits" } dom_struct = { path = "../dom_struct" } diff --git a/components/script/dom/headers.rs b/components/script/dom/headers.rs index b9198d8d4f2..03fecd79c5a 100644 --- a/components/script/dom/headers.rs +++ b/components/script/dom/headers.rs @@ -10,8 +10,9 @@ use crate::dom::bindings::reflector::{reflect_dom_object, Reflector}; use crate::dom::bindings::root::DomRoot; use crate::dom::bindings::str::{is_token, ByteString}; use crate::dom::globalscope::GlobalScope; +use data_url::mime::Mime as DataUrlMime; use dom_struct::dom_struct; -use http::header::{self, HeaderMap as HyperHeaders, HeaderName, HeaderValue}; +use http::header::{HeaderMap as HyperHeaders, HeaderName, HeaderValue}; use net_traits::request::is_cors_safelisted_request_header; use std::cell::Cell; use std::str::{self, FromStr}; @@ -269,10 +270,7 @@ impl Headers { // https://fetch.spec.whatwg.org/#concept-header-extract-mime-type pub fn extract_mime_type(&self) -> Vec { - self.header_list - .borrow() - .get(header::CONTENT_TYPE) - .map_or(vec![], |v| v.as_bytes().to_owned()) + extract_mime_type(&*self.header_list.borrow()).unwrap_or(vec![]) } pub fn sort_header_list(&self) -> Vec<(String, Vec)> { @@ -469,3 +467,72 @@ pub fn is_obs_text(x: u8) -> bool { _ => false, } } + +// https://fetch.spec.whatwg.org/#concept-header-extract-mime-type +// This function uses data_url::Mime to parse the MIME Type because +// mime::Mime does not provide a parser following the Fetch spec +// see https://github.com/hyperium/mime/issues/106 +pub fn extract_mime_type(headers: &HyperHeaders) -> Option> { + let mut charset: Option = None; + let mut essence: String = "".to_string(); + let mut mime_type: Option = None; + + // Step 4 + let headers_values = headers.get_all(http::header::CONTENT_TYPE).iter(); + + // Step 5 + if headers_values.size_hint() == (0, Some(0)) { + return None; + } + + // Step 6 + for header_value in headers_values { + // Step 6.1 + match DataUrlMime::from_str(header_value.to_str().unwrap_or("")) { + // Step 6.2 + Err(_) => continue, + Ok(temp_mime) => { + let temp_essence = format!("{}/{}", temp_mime.type_, temp_mime.subtype); + + // Step 6.2 + if temp_essence == "*/*" { + continue; + } + + let temp_charset = &temp_mime.get_parameter("charset"); + + // Step 6.3 + mime_type = Some(DataUrlMime { + type_: temp_mime.type_.to_string(), + subtype: temp_mime.subtype.to_string(), + parameters: temp_mime.parameters.clone(), + }); + + // Step 6.4 + if temp_essence != essence { + charset = temp_charset.map(|c| c.to_string()); + essence = temp_essence.to_owned(); + } else { + // Step 6.5 + if temp_charset.is_none() && charset.is_some() { + let DataUrlMime { + type_: t, + subtype: st, + parameters: p, + } = mime_type.unwrap(); + let mut params = p; + params.push(("charset".to_string(), charset.clone().unwrap())); + mime_type = Some(DataUrlMime { + type_: t.to_string(), + subtype: st.to_string(), + parameters: params, + }) + } + } + }, + } + } + + // Step 7, 8 + return mime_type.map(|m| format!("{}", m).into_bytes()); +} diff --git a/components/script/dom/xmlhttprequest.rs b/components/script/dom/xmlhttprequest.rs index bc3a401208b..626f370316e 100644 --- a/components/script/dom/xmlhttprequest.rs +++ b/components/script/dom/xmlhttprequest.rs @@ -22,7 +22,7 @@ use crate::dom::document::{Document, HasBrowsingContext, IsHTMLDocument}; use crate::dom::event::{Event, EventBubbles, EventCancelable}; use crate::dom::eventtarget::EventTarget; use crate::dom::globalscope::GlobalScope; -use crate::dom::headers::is_forbidden_header_name; +use crate::dom::headers::{extract_mime_type, is_forbidden_header_name}; use crate::dom::node::Node; use crate::dom::performanceresourcetiming::InitiatorType; use crate::dom::progressevent::ProgressEvent; @@ -1364,7 +1364,7 @@ impl XMLHttpRequest { // Caching: if we have existing response xml, redirect it directly let response = self.response_xml.get(); if response.is_some() { - return self.response_xml.get(); + return response; } // Step 1 @@ -1372,41 +1372,59 @@ impl XMLHttpRequest { return None; } + // Step 2 let mime_type = self.final_mime_type(); - // TODO: prescan the response to determine encoding if final charset is null + // Step 5.3, 7 let charset = self.final_charset().unwrap_or(UTF_8); let temp_doc: DomRoot; match mime_type { Some(ref mime) if mime.type_() == mime::TEXT && mime.subtype() == mime::HTML => { - // Step 5 + // Step 4 if self.response_type.get() == XMLHttpRequestResponseType::_empty { return None; } else { - // Step 6 + // TODO Step 5.2 "If charset is null, prescan the first 1024 bytes of xhr’s received bytes" + // Step 5 temp_doc = self.document_text_html(); } }, // Step 7 - Some(ref mime) - if (mime.type_() == mime::TEXT && mime.subtype() == mime::XML) || - (mime.type_() == mime::APPLICATION && mime.subtype() == mime::XML) => - { - temp_doc = self.handle_xml(); - } None => { temp_doc = self.handle_xml(); + // Not sure it the parser should throw an error for this case + // The specification does not indicates this test, + // but for now we check the document has no child nodes + let has_no_child_nodes = temp_doc.upcast::().children().next().is_none(); + if has_no_child_nodes { + return None; + } }, - Some(ref mime) if mime.suffix() == Some(mime::XML) => { + Some(ref mime) + if (mime.type_() == mime::TEXT && mime.subtype() == mime::XML) || + (mime.type_() == mime::APPLICATION && mime.subtype() == mime::XML) || + mime.suffix() == Some(mime::XML) => + { temp_doc = self.handle_xml(); - }, - // Step 4 + // Not sure it the parser should throw an error for this case + // The specification does not indicates this test, + // but for now we check the document has no child nodes + let has_no_child_nodes = temp_doc.upcast::().children().next().is_none(); + if has_no_child_nodes { + return None; + } + } + // Step 3 _ => { return None; }, } - // Step 9 + // Step 8 temp_doc.set_encoding(charset); - // Step 13 + + // Step 9 to 11 + // Done by handle_text_html and handle_xml + + // Step 12 self.response_xml.set(Some(&temp_doc)); return self.response_xml.get(); } @@ -1585,14 +1603,25 @@ impl XMLHttpRequest { } } + /// + fn response_mime_type(&self) -> Option { + return extract_mime_type(&self.response_headers.borrow()) + .map(|mime_as_bytes| { + String::from_utf8(mime_as_bytes) + .unwrap_or_default() + .parse() + .ok() + }) + .flatten() + .or(Some(mime::TEXT_XML)); + } + + /// fn final_mime_type(&self) -> Option { if self.override_mime_type.borrow().is_some() { self.override_mime_type.borrow().clone() } else { - match self.response_headers.borrow().typed_get::() { - Some(ct) => Some(ct.into()), - None => None, - } + return self.response_mime_type(); } } } diff --git a/tests/wpt/metadata/fetch/content-type/response.window.js.ini b/tests/wpt/metadata/fetch/content-type/response.window.js.ini index 3860befbc3a..8d48307a7c4 100644 --- a/tests/wpt/metadata/fetch/content-type/response.window.js.ini +++ b/tests/wpt/metadata/fetch/content-type/response.window.js.ini @@ -96,9 +96,6 @@ [Request: combined response Content-Type: text/plain;charset=gbk text/plain] expected: NOTRUN - [fetch(): separate response Content-Type: text/plain] - expected: FAIL - [fetch(): combined response Content-Type: text/html;" \\" text/plain ";charset=GBK] expected: NOTRUN @@ -312,21 +309,6 @@ [fetch(): separate response Content-Type: text/plain ] expected: NOTRUN - [