Use spec compliant content-type extraction in more places and enable a <stylesheet> quirk (#28321)

This changes includes two semi-related things: 1. Fixes some specification compliance issues when parsing mime types and charsets for `XMLHttpRequest`. 2. Implements a `<stylesheet>` parsing quirk involving mime types. Testing: There are tests for these changes. Signed-off-by: Martin Robinson <mrobinson@igalia.com> Co-authored-by: Martin Robinson <mrobinson@igalia.com>
2025-09-30 16:49:16 +01:00 · 2025-05-19 13:38:01 +02:00 · 2025-05-19 13:38:01 +02:00 · 6e97fc0bc4
commit 6e97fc0bc4
parent d8837e4a52
17 changed files with 231 additions and 1148 deletions
--- a/components/script/dom/headers.rs
+++ b/components/script/dom/headers.rs
@ -5,12 +5,12 @@
 use std::cell::Cell;
 use std::str::{self, FromStr};

-use data_url::mime::Mime as DataUrlMime;
 use dom_struct::dom_struct;
 use http::header::{HeaderMap as HyperHeaders, HeaderName, HeaderValue};
 use js::rust::HandleObject;
 use net_traits::fetch::headers::{
-    get_decode_and_split_header_value, get_value_from_header_list, is_forbidden_method,
+    extract_mime_type, get_decode_and_split_header_value, get_value_from_header_list,
+    is_forbidden_method,
 };
 use net_traits::request::is_cors_safelisted_request_header;

@ -564,72 +564,3 @@ pub(crate) fn is_vchar(x: u8) -> bool {
 pub(crate) fn is_obs_text(x: u8) -> bool {
    matches!(x, 0x80..=0xFF)
 }
-
-// https://fetch.spec.whatwg.org/#concept-header-extract-mime-type
-// This function uses data_url::Mime to parse the MIME Type because
-// mime::Mime does not provide a parser following the Fetch spec
-// see https://github.com/hyperium/mime/issues/106
-pub(crate) fn extract_mime_type(headers: &HyperHeaders) -> Option<Vec<u8>> {
-    let mut charset: Option<String> = None;
-    let mut essence: String = "".to_string();
-    let mut mime_type: Option<DataUrlMime> = None;
-
-    // Step 4
-    let headers_values = headers.get_all(http::header::CONTENT_TYPE).iter();
-
-    // Step 5
-    if headers_values.size_hint() == (0, Some(0)) {
-        return None;
-    }
-
-    // Step 6
-    for header_value in headers_values {
-        // Step 6.1
-        match DataUrlMime::from_str(header_value.to_str().unwrap_or("")) {
-            // Step 6.2
-            Err(_) => continue,
-            Ok(temp_mime) => {
-                let temp_essence = format!("{}/{}", temp_mime.type_, temp_mime.subtype);
-
-                // Step 6.2
-                if temp_essence == "*/*" {
-                    continue;
-                }
-
-                let temp_charset = &temp_mime.get_parameter("charset");
-
-                // Step 6.3
-                mime_type = Some(DataUrlMime {
-                    type_: temp_mime.type_.to_string(),
-                    subtype: temp_mime.subtype.to_string(),
-                    parameters: temp_mime.parameters.clone(),
-                });
-
-                // Step 6.4
-                if temp_essence != essence {
-                    charset = temp_charset.map(|c| c.to_string());
-                    temp_essence.clone_into(&mut essence);
-                } else {
-                    // Step 6.5
-                    if temp_charset.is_none() && charset.is_some() {
-                        let DataUrlMime {
-                            type_: t,
-                            subtype: st,
-                            parameters: p,
-                        } = mime_type.unwrap();
-                        let mut params = p;
-                        params.push(("charset".to_string(), charset.clone().unwrap()));
-                        mime_type = Some(DataUrlMime {
-                            type_: t.to_string(),
-                            subtype: st.to_string(),
-                            parameters: params,
-                        })
-                    }
-                }
-            },
-        }
-    }
-
-    // Step 7, 8
-    mime_type.map(|m| format!("{}", m).into_bytes())
-}
--- a/components/script/dom/xmlhttprequest.rs
+++ b/components/script/dom/xmlhttprequest.rs
@ -26,6 +26,7 @@ use js::jsval::{JSVal, NullValue};
 use js::rust::wrappers::JS_ParseJSON;
 use js::rust::{HandleObject, MutableHandleValue};
 use js::typedarray::{ArrayBuffer, ArrayBufferU8};
+use net_traits::fetch::headers::extract_mime_type_as_dataurl_mime;
 use net_traits::http_status::HttpStatus;
 use net_traits::request::{CredentialsMode, Referrer, RequestBuilder, RequestId, RequestMode};
 use net_traits::{
@ -59,7 +60,7 @@ use crate::dom::document::{Document, DocumentSource, HasBrowsingContext, IsHTMLD
 use crate::dom::event::{Event, EventBubbles, EventCancelable};
 use crate::dom::eventtarget::EventTarget;
 use crate::dom::globalscope::GlobalScope;
-use crate::dom::headers::{extract_mime_type, is_forbidden_request_header};
+use crate::dom::headers::is_forbidden_request_header;
 use crate::dom::node::Node;
 use crate::dom::performanceresourcetiming::InitiatorType;
 use crate::dom::progressevent::ProgressEvent;
@ -1324,11 +1325,7 @@ impl XMLHttpRequest {
            return response;
        }
        // Step 2
-        let mime = self
-            .final_mime_type()
-            .as_ref()
-            .map(|m| normalize_type_string(&m.to_string()))
-            .unwrap_or("".to_owned());
+        let mime = normalize_type_string(&self.final_mime_type().to_string());

        // Step 3, 4
        let bytes = self.response.borrow().to_vec();
@ -1366,64 +1363,77 @@ impl XMLHttpRequest {
            return response;
        }

-        // Step 1
+        // Step 1: If xhr’s response’s body is null, then return.
        if self.response_status.get().is_err() {
            return None;
        }

-        // Step 2
-        let mime_type = self.final_mime_type();
-        // Step 5.3, 7
-        let charset = self.final_charset().unwrap_or(UTF_8);
-        let temp_doc: DomRoot<Document>;
-        match mime_type {
-            Some(ref mime) if mime.matches(TEXT, HTML) => {
-                // Step 4
-                if self.response_type.get() == XMLHttpRequestResponseType::_empty {
-                    return None;
-                } else {
-                    // TODO Step 5.2 "If charset is null, prescan the first 1024 bytes of xhr’s received bytes"
-                    // Step 5
-                    temp_doc = self.document_text_html(can_gc);
-                }
-            },
-            // Step 7
-            None => {
-                temp_doc = self.handle_xml(can_gc);
-                // Not sure it the parser should throw an error for this case
-                // The specification does not indicates this test,
-                // but for now we check the document has no child nodes
-                let has_no_child_nodes = temp_doc.upcast::<Node>().children().next().is_none();
-                if has_no_child_nodes {
-                    return None;
-                }
-            },
-            Some(ref mime)
-                if mime.matches(TEXT, XML) ||
-                    mime.matches(APPLICATION, XML) ||
-                    mime.has_suffix(XML) =>
-            {
-                temp_doc = self.handle_xml(can_gc);
-                // Not sure it the parser should throw an error for this case
-                // The specification does not indicates this test,
-                // but for now we check the document has no child nodes
-                let has_no_child_nodes = temp_doc.upcast::<Node>().children().next().is_none();
-                if has_no_child_nodes {
-                    return None;
-                }
-            },
-            // Step 3
-            _ => {
-                return None;
-            },
+        // Step 2: Let finalMIME be the result of get a final MIME type for xhr.
+        let final_mime = self.final_mime_type();
+
+        // Step 3: If finalMIME is not an HTML MIME type or an XML MIME type, then return.
+        let is_xml_mime_type = final_mime.matches(TEXT, XML) ||
+            final_mime.matches(APPLICATION, XML) ||
+            final_mime.has_suffix(XML);
+        if !final_mime.matches(TEXT, HTML) && !is_xml_mime_type {
+            return None;
        }
-        // Step 8
+
+        // Step 4: If xhr’s response type is the empty string and finalMIME is an HTML MIME
+        //         type, then return.
+        let charset;
+        let temp_doc;
+        if final_mime.matches(TEXT, HTML) {
+            if self.response_type.get() == XMLHttpRequestResponseType::_empty {
+                return None;
+            }
+
+            // Step 5: If finalMIME is an HTML MIME type, then:
+            // Step 5.1: Let charset be the result of get a final encoding for xhr.
+            // Step 5.2: If charset is null, prescan the first 1024 bytes of xhr’s received bytes
+            // and if that does not terminate unsuccessfully then let charset be the return value.
+            // TODO: This isn't happening right now.
+            // Step 5.3. If charset is null, then set charset to UTF-8.
+            charset = Some(self.final_charset().unwrap_or(UTF_8));
+
+            // Step 5.4: Let document be a document that represents the result parsing xhr’s
+            // received bytes following the rules set forth in the HTML Standard for an HTML parser
+            // with scripting disabled and a known definite encoding charset. [HTML]
+            temp_doc = self.document_text_html(can_gc);
+        } else {
+            assert!(is_xml_mime_type);
+
+            // Step 6: Otherwise, let document be a document that represents the result of running
+            // the XML parser with XML scripting support disabled on xhr’s received bytes. If that
+            // fails (unsupported character encoding, namespace well-formedness error, etc.), then
+            // return null. [HTML]
+            //
+            // TODO: The spec seems to suggest the charset should come from the XML parser here.
+            temp_doc = self.handle_xml(can_gc);
+            charset = self.final_charset();
+
+            // Not sure it the parser should throw an error for this case
+            // The specification does not indicates this test,
+            // but for now we check the document has no child nodes
+            let has_no_child_nodes = temp_doc.upcast::<Node>().children().next().is_none();
+            if has_no_child_nodes {
+                return None;
+            }
+        }
+
+        // Step 7: If charset is null, then set charset to UTF-8.
+        let charset = charset.unwrap_or(UTF_8);
+
+        // Step 8: Set document’s encoding to charset.
        temp_doc.set_encoding(charset);

-        // Step 9 to 11
-        // Done by handle_text_html and handle_xml
+        // Step 9: Set document’s content type to finalMIME.
+        // Step 10: Set document’s URL to xhr’s response’s URL.
+        // Step 11: Set document’s origin to xhr’s relevant settings object’s origin.
+        //
+        // Done by `handle_text_html()` and `handle_xml()`.

-        // Step 12
+        // Step 12: Set xhr’s response object to document.
        self.response_xml.set(Some(&temp_doc));
        self.response_xml.get()
    }
@ -1507,7 +1517,7 @@ impl XMLHttpRequest {
            Ok(parsed) => Some(parsed),
            Err(_) => None, // Step 7
        };
-        let content_type = self.final_mime_type();
+        let content_type = Some(self.final_mime_type());
        Document::new(
            win,
            HasBrowsingContext::No,
@ -1598,14 +1608,16 @@ impl XMLHttpRequest {
        // 3. If responseMIME’s parameters["charset"] exists, then set label to it.
        let response_charset = self
            .response_mime_type()
-            .and_then(|mime| mime.get_parameter(CHARSET).map(|c| c.to_string()));
+            .get_parameter(CHARSET)
+            .map(ToString::to_string);

        // 4. If xhr’s override MIME type’s parameters["charset"] exists, then set label to it.
        let override_charset = self
            .override_mime_type
            .borrow()
            .as_ref()
-            .and_then(|mime| mime.get_parameter(CHARSET).map(|c| c.to_string()));
+            .and_then(|mime| mime.get_parameter(CHARSET))
+            .map(ToString::to_string);

        // 5. If label is null, then return null.
        // 6. Let encoding be the result of getting an encoding from label.
@ -1617,23 +1629,22 @@ impl XMLHttpRequest {
    }

    /// <https://xhr.spec.whatwg.org/#response-mime-type>
-    fn response_mime_type(&self) -> Option<Mime> {
-        return extract_mime_type(&self.response_headers.borrow())
-            .and_then(|mime_as_bytes| {
-                String::from_utf8(mime_as_bytes)
-                    .unwrap_or_default()
-                    .parse()
-                    .ok()
-            })
-            .or(Some(Mime::new(TEXT, XML)));
+    fn response_mime_type(&self) -> Mime {
+        // 1. Let mimeType be the result of extracting a MIME type from xhr’s response’s
+        //    header list.
+        // 2. If mimeType is failure, then set mimeType to text/xml.
+        // 3. Return mimeType.
+        extract_mime_type_as_dataurl_mime(&self.response_headers.borrow())
+            .unwrap_or_else(|| Mime::new(TEXT, XML))
    }

    /// <https://xhr.spec.whatwg.org/#final-mime-type>
-    fn final_mime_type(&self) -> Option<Mime> {
-        match *self.override_mime_type.borrow() {
-            Some(ref override_mime) => Some(override_mime.clone()),
-            None => self.response_mime_type(),
-        }
+    fn final_mime_type(&self) -> Mime {
+        self.override_mime_type
+            .borrow()
+            .as_ref()
+            .map(MimeExt::clone)
+            .unwrap_or_else(|| self.response_mime_type())
    }
 }

--- a/components/script/stylesheet_loader.rs
+++ b/components/script/stylesheet_loader.rs
@ -163,7 +163,8 @@ impl FetchResponseListener for StylesheetContext {
                Some(meta) => meta,
                None => return,
            };
-            let is_css = metadata.content_type.is_some_and(|ct| {
+
+            let mut is_css = metadata.content_type.is_some_and(|ct| {
                let mime: Mime = ct.into_inner().into();
                mime.type_() == mime::TEXT && mime.subtype() == mime::CSS
            }) || (
@ -177,6 +178,17 @@ impl FetchResponseListener for StylesheetContext {
                    document.origin().immutable().clone() == metadata.final_url.origin()
            );

+            // From <https://html.spec.whatwg.org/multipage/#link-type-stylesheet>:
+            // > Quirk: If the document has been set to quirks mode, has the same origin as
+            // > the URL of the external resource, and the Content-Type metadata of the
+            // > external resource is not a supported style sheet type, the user agent must
+            // > instead assume it to be text/css.
+            if document.quirks_mode() == QuirksMode::Quirks &&
+                document.url().origin() == self.url.origin()
+            {
+                is_css = true;
+            }
+
            let data = if is_css {
                let data = std::mem::take(&mut self.data);
                self.unminify_css(data, metadata.final_url.clone())