From 6e97fc0bc47b6c3c5af9b1750f01f268b6b200f5 Mon Sep 17 00:00:00 2001 From: Vincent Ricard Date: Mon, 19 May 2025 13:38:01 +0200 Subject: [PATCH] Use spec compliant content-type extraction in more places and enable a `` quirk (#28321) This changes includes two semi-related things: 1. Fixes some specification compliance issues when parsing mime types and charsets for `XMLHttpRequest`. 2. Implements a `` parsing quirk involving mime types. Testing: There are tests for these changes. Signed-off-by: Martin Robinson Co-authored-by: Martin Robinson --- Cargo.lock | 1 + components/net/fetch/methods.rs | 29 +- components/script/dom/headers.rs | 73 +- components/script/dom/xmlhttprequest.rs | 157 ++-- components/script/stylesheet_loader.rs | 14 +- components/shared/net/Cargo.toml | 1 + components/shared/net/fetch/headers.rs | 93 ++- components/shared/net/response.rs | 10 +- .../single-byte-decoder.window.js.ini | 2 - .../fetch/content-type/response.window.js.ini | 111 --- .../fetch/content-type/script.window.js.ini | 57 -- tests/wpt/meta/fetch/nosniff/script.html.ini | 3 - .../meta/fetch/nosniff/stylesheet.html.ini | 6 - tests/wpt/meta/fetch/nosniff/worker.html.ini | 3 - .../charset-parameter.window.js.ini | 36 +- .../mimesniff/mime-types/parsing.any.js.ini | 780 +----------------- .../workers/importscripts_mime.any.js.ini | 3 - 17 files changed, 231 insertions(+), 1148 deletions(-) delete mode 100644 tests/wpt/meta/fetch/content-type/script.window.js.ini delete mode 100644 tests/wpt/meta/fetch/nosniff/script.html.ini delete mode 100644 tests/wpt/meta/fetch/nosniff/worker.html.ini diff --git a/Cargo.lock b/Cargo.lock index 7094e8ef378..8e56aa33ed9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4864,6 +4864,7 @@ dependencies = [ "content-security-policy", "cookie 0.18.1", "crossbeam-channel", + "data-url", "embedder_traits", "headers 0.4.0", "http 1.3.1", diff --git a/components/net/fetch/methods.rs b/components/net/fetch/methods.rs index 65c173fce3b..33d0da952fb 100644 --- a/components/net/fetch/methods.rs +++ b/components/net/fetch/methods.rs @@ -18,6 +18,7 @@ use http::{HeaderValue, Method, StatusCode}; use ipc_channel::ipc; use log::{debug, trace, warn}; use mime::{self, Mime}; +use net_traits::fetch::headers::extract_mime_type_as_mime; use net_traits::filemanager_thread::{FileTokenCheck, RelativePos}; use net_traits::http_status::HttpStatus; use net_traits::policy_container::{PolicyContainer, RequestPolicyContainer}; @@ -886,7 +887,7 @@ pub fn should_be_blocked_due_to_nosniff( // Step 2 // Note: an invalid MIME type will produce a `None`. - let content_type_header = response_headers.typed_get::(); + let mime_type = extract_mime_type_as_mime(response_headers); /// #[inline] @@ -915,16 +916,12 @@ pub fn should_be_blocked_due_to_nosniff( .any(|mime| mime.type_() == mime_type.type_() && mime.subtype() == mime_type.subtype()) } - match content_type_header { + match mime_type { // Step 4 - Some(ref ct) if destination.is_script_like() => { - !is_javascript_mime_type(&ct.clone().into()) - }, - + Some(ref mime_type) if destination.is_script_like() => !is_javascript_mime_type(mime_type), // Step 5 - Some(ref ct) if destination == Destination::Style => { - let m: mime::Mime = ct.clone().into(); - m.type_() != mime::TEXT && m.subtype() != mime::CSS + Some(ref mime_type) if destination == Destination::Style => { + mime_type.type_() != mime::TEXT && mime_type.subtype() != mime::CSS }, None if destination == Destination::Style || destination.is_script_like() => true, @@ -938,18 +935,22 @@ fn should_be_blocked_due_to_mime_type( destination: Destination, response_headers: &HeaderMap, ) -> bool { - // Step 1 - let mime_type: mime::Mime = match response_headers.typed_get::() { - Some(header) => header.into(), + // Step 1: Let mimeType be the result of extracting a MIME type from response’s header list. + let mime_type: mime::Mime = match extract_mime_type_as_mime(response_headers) { + Some(mime_type) => mime_type, + // Step 2: If mimeType is failure, then return allowed. None => return false, }; - // Step 2-3 + // Step 3: Let destination be request’s destination. + // Step 4: If destination is script-like and one of the following is true, then return blocked: + // - mimeType’s essence starts with "audio/", "image/", or "video/". + // - mimeType’s essence is "text/csv". + // Step 5: Return allowed. destination.is_script_like() && match mime_type.type_() { mime::AUDIO | mime::VIDEO | mime::IMAGE => true, mime::TEXT if mime_type.subtype() == mime::CSV => true, - // Step 4 _ => false, } } diff --git a/components/script/dom/headers.rs b/components/script/dom/headers.rs index 10a8be731bf..0e8dcf92ccd 100644 --- a/components/script/dom/headers.rs +++ b/components/script/dom/headers.rs @@ -5,12 +5,12 @@ use std::cell::Cell; use std::str::{self, FromStr}; -use data_url::mime::Mime as DataUrlMime; use dom_struct::dom_struct; use http::header::{HeaderMap as HyperHeaders, HeaderName, HeaderValue}; use js::rust::HandleObject; use net_traits::fetch::headers::{ - get_decode_and_split_header_value, get_value_from_header_list, is_forbidden_method, + extract_mime_type, get_decode_and_split_header_value, get_value_from_header_list, + is_forbidden_method, }; use net_traits::request::is_cors_safelisted_request_header; @@ -564,72 +564,3 @@ pub(crate) fn is_vchar(x: u8) -> bool { pub(crate) fn is_obs_text(x: u8) -> bool { matches!(x, 0x80..=0xFF) } - -// https://fetch.spec.whatwg.org/#concept-header-extract-mime-type -// This function uses data_url::Mime to parse the MIME Type because -// mime::Mime does not provide a parser following the Fetch spec -// see https://github.com/hyperium/mime/issues/106 -pub(crate) fn extract_mime_type(headers: &HyperHeaders) -> Option> { - let mut charset: Option = None; - let mut essence: String = "".to_string(); - let mut mime_type: Option = None; - - // Step 4 - let headers_values = headers.get_all(http::header::CONTENT_TYPE).iter(); - - // Step 5 - if headers_values.size_hint() == (0, Some(0)) { - return None; - } - - // Step 6 - for header_value in headers_values { - // Step 6.1 - match DataUrlMime::from_str(header_value.to_str().unwrap_or("")) { - // Step 6.2 - Err(_) => continue, - Ok(temp_mime) => { - let temp_essence = format!("{}/{}", temp_mime.type_, temp_mime.subtype); - - // Step 6.2 - if temp_essence == "*/*" { - continue; - } - - let temp_charset = &temp_mime.get_parameter("charset"); - - // Step 6.3 - mime_type = Some(DataUrlMime { - type_: temp_mime.type_.to_string(), - subtype: temp_mime.subtype.to_string(), - parameters: temp_mime.parameters.clone(), - }); - - // Step 6.4 - if temp_essence != essence { - charset = temp_charset.map(|c| c.to_string()); - temp_essence.clone_into(&mut essence); - } else { - // Step 6.5 - if temp_charset.is_none() && charset.is_some() { - let DataUrlMime { - type_: t, - subtype: st, - parameters: p, - } = mime_type.unwrap(); - let mut params = p; - params.push(("charset".to_string(), charset.clone().unwrap())); - mime_type = Some(DataUrlMime { - type_: t.to_string(), - subtype: st.to_string(), - parameters: params, - }) - } - } - }, - } - } - - // Step 7, 8 - mime_type.map(|m| format!("{}", m).into_bytes()) -} diff --git a/components/script/dom/xmlhttprequest.rs b/components/script/dom/xmlhttprequest.rs index ca5bb72a1dc..4e7c136f42b 100644 --- a/components/script/dom/xmlhttprequest.rs +++ b/components/script/dom/xmlhttprequest.rs @@ -26,6 +26,7 @@ use js::jsval::{JSVal, NullValue}; use js::rust::wrappers::JS_ParseJSON; use js::rust::{HandleObject, MutableHandleValue}; use js::typedarray::{ArrayBuffer, ArrayBufferU8}; +use net_traits::fetch::headers::extract_mime_type_as_dataurl_mime; use net_traits::http_status::HttpStatus; use net_traits::request::{CredentialsMode, Referrer, RequestBuilder, RequestId, RequestMode}; use net_traits::{ @@ -59,7 +60,7 @@ use crate::dom::document::{Document, DocumentSource, HasBrowsingContext, IsHTMLD use crate::dom::event::{Event, EventBubbles, EventCancelable}; use crate::dom::eventtarget::EventTarget; use crate::dom::globalscope::GlobalScope; -use crate::dom::headers::{extract_mime_type, is_forbidden_request_header}; +use crate::dom::headers::is_forbidden_request_header; use crate::dom::node::Node; use crate::dom::performanceresourcetiming::InitiatorType; use crate::dom::progressevent::ProgressEvent; @@ -1324,11 +1325,7 @@ impl XMLHttpRequest { return response; } // Step 2 - let mime = self - .final_mime_type() - .as_ref() - .map(|m| normalize_type_string(&m.to_string())) - .unwrap_or("".to_owned()); + let mime = normalize_type_string(&self.final_mime_type().to_string()); // Step 3, 4 let bytes = self.response.borrow().to_vec(); @@ -1366,64 +1363,77 @@ impl XMLHttpRequest { return response; } - // Step 1 + // Step 1: If xhr’s response’s body is null, then return. if self.response_status.get().is_err() { return None; } - // Step 2 - let mime_type = self.final_mime_type(); - // Step 5.3, 7 - let charset = self.final_charset().unwrap_or(UTF_8); - let temp_doc: DomRoot; - match mime_type { - Some(ref mime) if mime.matches(TEXT, HTML) => { - // Step 4 - if self.response_type.get() == XMLHttpRequestResponseType::_empty { - return None; - } else { - // TODO Step 5.2 "If charset is null, prescan the first 1024 bytes of xhr’s received bytes" - // Step 5 - temp_doc = self.document_text_html(can_gc); - } - }, - // Step 7 - None => { - temp_doc = self.handle_xml(can_gc); - // Not sure it the parser should throw an error for this case - // The specification does not indicates this test, - // but for now we check the document has no child nodes - let has_no_child_nodes = temp_doc.upcast::().children().next().is_none(); - if has_no_child_nodes { - return None; - } - }, - Some(ref mime) - if mime.matches(TEXT, XML) || - mime.matches(APPLICATION, XML) || - mime.has_suffix(XML) => - { - temp_doc = self.handle_xml(can_gc); - // Not sure it the parser should throw an error for this case - // The specification does not indicates this test, - // but for now we check the document has no child nodes - let has_no_child_nodes = temp_doc.upcast::().children().next().is_none(); - if has_no_child_nodes { - return None; - } - }, - // Step 3 - _ => { - return None; - }, + // Step 2: Let finalMIME be the result of get a final MIME type for xhr. + let final_mime = self.final_mime_type(); + + // Step 3: If finalMIME is not an HTML MIME type or an XML MIME type, then return. + let is_xml_mime_type = final_mime.matches(TEXT, XML) || + final_mime.matches(APPLICATION, XML) || + final_mime.has_suffix(XML); + if !final_mime.matches(TEXT, HTML) && !is_xml_mime_type { + return None; } - // Step 8 + + // Step 4: If xhr’s response type is the empty string and finalMIME is an HTML MIME + // type, then return. + let charset; + let temp_doc; + if final_mime.matches(TEXT, HTML) { + if self.response_type.get() == XMLHttpRequestResponseType::_empty { + return None; + } + + // Step 5: If finalMIME is an HTML MIME type, then: + // Step 5.1: Let charset be the result of get a final encoding for xhr. + // Step 5.2: If charset is null, prescan the first 1024 bytes of xhr’s received bytes + // and if that does not terminate unsuccessfully then let charset be the return value. + // TODO: This isn't happening right now. + // Step 5.3. If charset is null, then set charset to UTF-8. + charset = Some(self.final_charset().unwrap_or(UTF_8)); + + // Step 5.4: Let document be a document that represents the result parsing xhr’s + // received bytes following the rules set forth in the HTML Standard for an HTML parser + // with scripting disabled and a known definite encoding charset. [HTML] + temp_doc = self.document_text_html(can_gc); + } else { + assert!(is_xml_mime_type); + + // Step 6: Otherwise, let document be a document that represents the result of running + // the XML parser with XML scripting support disabled on xhr’s received bytes. If that + // fails (unsupported character encoding, namespace well-formedness error, etc.), then + // return null. [HTML] + // + // TODO: The spec seems to suggest the charset should come from the XML parser here. + temp_doc = self.handle_xml(can_gc); + charset = self.final_charset(); + + // Not sure it the parser should throw an error for this case + // The specification does not indicates this test, + // but for now we check the document has no child nodes + let has_no_child_nodes = temp_doc.upcast::().children().next().is_none(); + if has_no_child_nodes { + return None; + } + } + + // Step 7: If charset is null, then set charset to UTF-8. + let charset = charset.unwrap_or(UTF_8); + + // Step 8: Set document’s encoding to charset. temp_doc.set_encoding(charset); - // Step 9 to 11 - // Done by handle_text_html and handle_xml + // Step 9: Set document’s content type to finalMIME. + // Step 10: Set document’s URL to xhr’s response’s URL. + // Step 11: Set document’s origin to xhr’s relevant settings object’s origin. + // + // Done by `handle_text_html()` and `handle_xml()`. - // Step 12 + // Step 12: Set xhr’s response object to document. self.response_xml.set(Some(&temp_doc)); self.response_xml.get() } @@ -1507,7 +1517,7 @@ impl XMLHttpRequest { Ok(parsed) => Some(parsed), Err(_) => None, // Step 7 }; - let content_type = self.final_mime_type(); + let content_type = Some(self.final_mime_type()); Document::new( win, HasBrowsingContext::No, @@ -1598,14 +1608,16 @@ impl XMLHttpRequest { // 3. If responseMIME’s parameters["charset"] exists, then set label to it. let response_charset = self .response_mime_type() - .and_then(|mime| mime.get_parameter(CHARSET).map(|c| c.to_string())); + .get_parameter(CHARSET) + .map(ToString::to_string); // 4. If xhr’s override MIME type’s parameters["charset"] exists, then set label to it. let override_charset = self .override_mime_type .borrow() .as_ref() - .and_then(|mime| mime.get_parameter(CHARSET).map(|c| c.to_string())); + .and_then(|mime| mime.get_parameter(CHARSET)) + .map(ToString::to_string); // 5. If label is null, then return null. // 6. Let encoding be the result of getting an encoding from label. @@ -1617,23 +1629,22 @@ impl XMLHttpRequest { } /// - fn response_mime_type(&self) -> Option { - return extract_mime_type(&self.response_headers.borrow()) - .and_then(|mime_as_bytes| { - String::from_utf8(mime_as_bytes) - .unwrap_or_default() - .parse() - .ok() - }) - .or(Some(Mime::new(TEXT, XML))); + fn response_mime_type(&self) -> Mime { + // 1. Let mimeType be the result of extracting a MIME type from xhr’s response’s + // header list. + // 2. If mimeType is failure, then set mimeType to text/xml. + // 3. Return mimeType. + extract_mime_type_as_dataurl_mime(&self.response_headers.borrow()) + .unwrap_or_else(|| Mime::new(TEXT, XML)) } /// - fn final_mime_type(&self) -> Option { - match *self.override_mime_type.borrow() { - Some(ref override_mime) => Some(override_mime.clone()), - None => self.response_mime_type(), - } + fn final_mime_type(&self) -> Mime { + self.override_mime_type + .borrow() + .as_ref() + .map(MimeExt::clone) + .unwrap_or_else(|| self.response_mime_type()) } } diff --git a/components/script/stylesheet_loader.rs b/components/script/stylesheet_loader.rs index a18d63e323b..5efaf78e542 100644 --- a/components/script/stylesheet_loader.rs +++ b/components/script/stylesheet_loader.rs @@ -163,7 +163,8 @@ impl FetchResponseListener for StylesheetContext { Some(meta) => meta, None => return, }; - let is_css = metadata.content_type.is_some_and(|ct| { + + let mut is_css = metadata.content_type.is_some_and(|ct| { let mime: Mime = ct.into_inner().into(); mime.type_() == mime::TEXT && mime.subtype() == mime::CSS }) || ( @@ -177,6 +178,17 @@ impl FetchResponseListener for StylesheetContext { document.origin().immutable().clone() == metadata.final_url.origin() ); + // From : + // > Quirk: If the document has been set to quirks mode, has the same origin as + // > the URL of the external resource, and the Content-Type metadata of the + // > external resource is not a supported style sheet type, the user agent must + // > instead assume it to be text/css. + if document.quirks_mode() == QuirksMode::Quirks && + document.url().origin() == self.url.origin() + { + is_css = true; + } + let data = if is_css { let data = std::mem::take(&mut self.data); self.unminify_css(data, metadata.final_url.clone()) diff --git a/components/shared/net/Cargo.toml b/components/shared/net/Cargo.toml index 79ea936a688..0dfad486455 100644 --- a/components/shared/net/Cargo.toml +++ b/components/shared/net/Cargo.toml @@ -19,6 +19,7 @@ compositing_traits = { workspace = true } content-security-policy = { workspace = true } cookie = { workspace = true } crossbeam-channel = { workspace = true } +data-url = { workspace = true } embedder_traits = { workspace = true } headers = { workspace = true } http = { workspace = true } diff --git a/components/shared/net/fetch/headers.rs b/components/shared/net/fetch/headers.rs index 11bb68d5d0a..5ffd537adf8 100644 --- a/components/shared/net/fetch/headers.rs +++ b/components/shared/net/fetch/headers.rs @@ -3,8 +3,9 @@ * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ use std::iter::Peekable; -use std::str::Chars; +use std::str::{Chars, FromStr}; +use data_url::mime::Mime as DataUrlMime; use headers::HeaderMap; /// @@ -184,3 +185,93 @@ fn collect_http_quoted_string(position: &mut Peekable, extract_value: boo // Step 6, 7 value } + +/// +/// This function uses data_url::Mime to parse the MIME Type because +/// mime::Mime does not provide a parser following the Fetch spec +/// see +pub fn extract_mime_type_as_dataurl_mime(headers: &HeaderMap) -> Option { + // > 1: Let charset be null. + let mut charset = None; + // > 2: Let essence be null. + let mut essence = String::new(); + // > 3: Let mimeType be null. + let mut mime_type = None; + + // > 4: Let values be the result of getting, decoding, and splitting `Content-Type` + // from headers. + // > 5: If values is null, then return failure. + let headers_values = get_decode_and_split_header_name("content-type", headers)?; + + // > 6: For each value of values: + for header_value in headers_values.iter() { + // > 6.1: Let temporaryMimeType be the result of parsing value. + match DataUrlMime::from_str(header_value) { + // > 6.2: If temporaryMimeType is failure or its essence is "*/*", then continue. + Err(_) => continue, + Ok(temp_mime) => { + let temp_essence = format!("{}/{}", temp_mime.type_, temp_mime.subtype); + + // > 6.2: If temporaryMimeType is failure or its essence is "*/*", then + // continue. + if temp_essence == "*/*" { + continue; + } + + // > 6.3: Set mimeType to temporaryMimeType. + mime_type = Some(DataUrlMime { + type_: temp_mime.type_.to_string(), + subtype: temp_mime.subtype.to_string(), + parameters: temp_mime.parameters.clone(), + }); + + // > 6.4: If mimeType’s essence is not essence, then: + let temp_charset = &temp_mime.get_parameter("charset"); + if temp_essence != essence { + // > 6.4.1: Set charset to null. + // > 6.4.2: If mimeType’s parameters["charset"] exists, then set + // charset to mimeType’s parameters["charset"]. + charset = temp_charset.map(|c| c.to_string()); + // > 6.4.3: Set essence to mimeType’s essence. + essence = temp_essence.to_owned(); + } else { + // > 6.5: Otherwise, if mimeType’s parameters["charset"] does not exist, + // and charset is non-null, set mimeType’s parameters["charset"] to charset. + if temp_charset.is_none() && charset.is_some() { + let DataUrlMime { + type_: t, + subtype: st, + parameters: p, + } = mime_type.unwrap(); + let mut params = p; + params.push(("charset".to_string(), charset.clone().unwrap())); + mime_type = Some(DataUrlMime { + type_: t.to_string(), + subtype: st.to_string(), + parameters: params, + }) + } + } + }, + } + } + + // > 7: If mimeType is null, then return failure. + // > 8: Return mimeType. + mime_type +} + +pub fn extract_mime_type(headers: &HeaderMap) -> Option> { + extract_mime_type_as_dataurl_mime(headers).map(|m| format!("{}", m).into_bytes()) +} + +pub fn extract_mime_type_as_mime(headers: &HeaderMap) -> Option { + extract_mime_type_as_dataurl_mime(headers).and_then(|mime: DataUrlMime| { + // Try to transform a data-url::mime::Mime into a mime::Mime + let mut mime_as_str = format!("{}/{}", mime.type_, mime.subtype); + for p in mime.parameters { + mime_as_str.push_str(format!("; {}={}", p.0, p.1).as_str()); + } + mime_as_str.parse().ok() + }) +} diff --git a/components/shared/net/response.rs b/components/shared/net/response.rs index f91993ddccb..9a01fbbf965 100644 --- a/components/shared/net/response.rs +++ b/components/shared/net/response.rs @@ -7,7 +7,6 @@ use std::sync::Mutex; use std::sync::atomic::AtomicBool; -use headers::{ContentType, HeaderMapExt}; use http::HeaderMap; use hyper_serde::Serde; use malloc_size_of_derive::MallocSizeOf; @@ -15,6 +14,7 @@ use serde::{Deserialize, Serialize}; use servo_arc::Arc; use servo_url::ServoUrl; +use crate::fetch::headers::extract_mime_type_as_mime; use crate::http_status::HttpStatus; use crate::{ FetchMetadata, FilteredMetadata, Metadata, NetworkError, ReferrerPolicy, ResourceFetchTiming, @@ -300,13 +300,7 @@ impl Response { pub fn metadata(&self) -> Result { fn init_metadata(response: &Response, url: &ServoUrl) -> Metadata { let mut metadata = Metadata::default(url.clone()); - metadata.set_content_type( - response - .headers - .typed_get::() - .map(|v| v.into()) - .as_ref(), - ); + metadata.set_content_type(extract_mime_type_as_mime(&response.headers).as_ref()); metadata.location_url.clone_from(&response.location_url); metadata.headers = Some(Serde(response.headers.clone())); metadata.status.clone_from(&response.status); diff --git a/tests/wpt/meta/encoding/single-byte-decoder.window.js.ini b/tests/wpt/meta/encoding/single-byte-decoder.window.js.ini index d71939b82b5..2ee12a08ed4 100644 --- a/tests/wpt/meta/encoding/single-byte-decoder.window.js.ini +++ b/tests/wpt/meta/encoding/single-byte-decoder.window.js.ini @@ -1,7 +1,5 @@ [single-byte-decoder.window.html?XMLHttpRequest] -[single-byte-decoder.window.html?TextDecoder] - [single-byte-decoder.window.html?document] [ISO-8859-2: iso_8859-2:1987 (document.characterSet and document.inputEncoding)] expected: FAIL diff --git a/tests/wpt/meta/fetch/content-type/response.window.js.ini b/tests/wpt/meta/fetch/content-type/response.window.js.ini index fb99623b59d..07b9bbfe08e 100644 --- a/tests/wpt/meta/fetch/content-type/response.window.js.ini +++ b/tests/wpt/meta/fetch/content-type/response.window.js.ini @@ -1,49 +1,10 @@ [response.window.html] - [fetch(): combined response Content-Type: text/plain ] - expected: FAIL - - [fetch(): separate response Content-Type: text/html;" \\" text/plain] - expected: FAIL - - [fetch(): separate response Content-Type: text/html;" text/plain] - expected: FAIL - - [