Use data_url::Mime to parse the MIME Types

This commit follows the spectification https://fetch.spec.whatwg.org/#concept-header-extract-mime-type
2025-07-23 07:13:52 +01:00 · 2020-10-29 18:03:22 +01:00 · 2020-10-29 18:03:22 +01:00 · 43b3d93aec
commit 43b3d93aec
parent 15a435a7ae
10 changed files with 147 additions and 60 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -4910,6 +4910,7 @@ dependencies = [
 "cookie",
 "crossbeam-channel",
 "cssparser",
 "data-url",
 "deny_public_fields",
 "devtools_traits",
 "dom_struct",
--- a/components/script/Cargo.toml
+++ b/components/script/Cargo.toml
@ -43,6 +43,7 @@ content-security-policy = { version = "0.4.0", features = ["serde"] }
 cookie = "0.11"
 crossbeam-channel = "0.4"
 cssparser = "0.27"
 data-url = "0.1.0"
 deny_public_fields = { path = "../deny_public_fields" }
 devtools_traits = { path = "../devtools_traits" }
 dom_struct = { path = "../dom_struct" }
--- a/components/script/dom/headers.rs
+++ b/components/script/dom/headers.rs
@ -10,8 +10,9 @@ use crate::dom::bindings::reflector::{reflect_dom_object, Reflector};
 use crate::dom::bindings::root::DomRoot;
 use crate::dom::bindings::str::{is_token, ByteString};
 use crate::dom::globalscope::GlobalScope;
 use data_url::mime::Mime as DataUrlMime;
 use dom_struct::dom_struct;
-use http::header::{self, HeaderMap as HyperHeaders, HeaderName, HeaderValue};
+use http::header::{HeaderMap as HyperHeaders, HeaderName, HeaderValue};
 use net_traits::request::is_cors_safelisted_request_header;
 use std::cell::Cell;
 use std::str::{self, FromStr};
@ -269,10 +270,7 @@ impl Headers {
    // https://fetch.spec.whatwg.org/#concept-header-extract-mime-type
    pub fn extract_mime_type(&self) -> Vec<u8> {
-        self.header_list
+        extract_mime_type(&*self.header_list.borrow()).unwrap_or(vec![])
            .borrow()
            .get(header::CONTENT_TYPE)
            .map_or(vec![], |v| v.as_bytes().to_owned())
    }
    pub fn sort_header_list(&self) -> Vec<(String, Vec<u8>)> {
@ -469,3 +467,72 @@ pub fn is_obs_text(x: u8) -> bool {
        _ => false,
    }
 }
 // https://fetch.spec.whatwg.org/#concept-header-extract-mime-type
 // This function uses data_url::Mime to parse the MIME Type because
 // mime::Mime does not provide a parser following the Fetch spec
 // see https://github.com/hyperium/mime/issues/106
 pub fn extract_mime_type(headers: &HyperHeaders) -> Option<Vec<u8>> {
    let mut charset: Option<String> = None;
    let mut essence: String = "".to_string();
    let mut mime_type: Option<DataUrlMime> = None;
    // Step 4
    let headers_values = headers.get_all(http::header::CONTENT_TYPE).iter();
    // Step 5
    if headers_values.size_hint() == (0, Some(0)) {
        return None;
    }
    // Step 6
    for header_value in headers_values {
        // Step 6.1
        match DataUrlMime::from_str(header_value.to_str().unwrap_or("")) {
            // Step 6.2
            Err(_) => continue,
            Ok(temp_mime) => {
                let temp_essence = format!("{}/{}", temp_mime.type_, temp_mime.subtype);
                // Step 6.2
                if temp_essence == "*/*" {
                    continue;
                }
                let temp_charset = &temp_mime.get_parameter("charset");
                // Step 6.3
                mime_type = Some(DataUrlMime {
                    type_: temp_mime.type_.to_string(),
                    subtype: temp_mime.subtype.to_string(),
                    parameters: temp_mime.parameters.clone(),
                });
                // Step 6.4
                if temp_essence != essence {
                    charset = temp_charset.map(|c| c.to_string());
                    essence = temp_essence.to_owned();
                } else {
                    // Step 6.5
                    if temp_charset.is_none() && charset.is_some() {
                        let DataUrlMime {
                            type_: t,
                            subtype: st,
                            parameters: p,
                        } = mime_type.unwrap();
                        let mut params = p;
                        params.push(("charset".to_string(), charset.clone().unwrap()));
                        mime_type = Some(DataUrlMime {
                            type_: t.to_string(),
                            subtype: st.to_string(),
                            parameters: params,
                        })
                    }
                }
            },
        }
    }
    // Step 7, 8
    return mime_type.map(|m| format!("{}", m).into_bytes());
 }
--- a/components/script/dom/xmlhttprequest.rs
+++ b/components/script/dom/xmlhttprequest.rs
@ -22,7 +22,7 @@ use crate::dom::document::{Document, HasBrowsingContext, IsHTMLDocument};
 use crate::dom::event::{Event, EventBubbles, EventCancelable};
 use crate::dom::eventtarget::EventTarget;
 use crate::dom::globalscope::GlobalScope;
-use crate::dom::headers::is_forbidden_header_name;
+use crate::dom::headers::{extract_mime_type, is_forbidden_header_name};
 use crate::dom::node::Node;
 use crate::dom::performanceresourcetiming::InitiatorType;
 use crate::dom::progressevent::ProgressEvent;
@ -1364,7 +1364,7 @@ impl XMLHttpRequest {
        // Caching: if we have existing response xml, redirect it directly
        let response = self.response_xml.get();
        if response.is_some() {
-            return self.response_xml.get();
+            return response;
        }
        // Step 1
@ -1372,41 +1372,59 @@ impl XMLHttpRequest {
            return None;
        }
        // Step 2
        let mime_type = self.final_mime_type();
-        // TODO: prescan the response to determine encoding if final charset is null
+        // Step 5.3, 7
        let charset = self.final_charset().unwrap_or(UTF_8);
        let temp_doc: DomRoot<Document>;
        match mime_type {
            Some(ref mime) if mime.type_() == mime::TEXT && mime.subtype() == mime::HTML => {
-                // Step 5
+                // Step 4
                if self.response_type.get() == XMLHttpRequestResponseType::_empty {
                    return None;
                } else {
-                    // Step 6
+                    // TODO Step 5.2 "If charset is null, prescan the first 1024 bytes of xhr’s received bytes"
                    // Step 5
                    temp_doc = self.document_text_html();
                }
            },
            // Step 7
            Some(ref mime)
                if (mime.type_() == mime::TEXT && mime.subtype() == mime::XML) ||
                    (mime.type_() == mime::APPLICATION && mime.subtype() == mime::XML) =>
            {
                temp_doc = self.handle_xml();
            }
            None => {
                temp_doc = self.handle_xml();
                // Not sure it the parser should throw an error for this case
                // The specification does not indicates this test,
                // but for now we check the document has no child nodes
                let has_no_child_nodes = temp_doc.upcast::<Node>().children().next().is_none();
                if has_no_child_nodes {
                    return None;
                }
            },
-            Some(ref mime) if mime.suffix() == Some(mime::XML) => {
+            Some(ref mime)
                if (mime.type_() == mime::TEXT && mime.subtype() == mime::XML) ||
                    (mime.type_() == mime::APPLICATION && mime.subtype() == mime::XML) ||
                    mime.suffix() == Some(mime::XML) =>
            {
                temp_doc = self.handle_xml();
-            },
+                // Not sure it the parser should throw an error for this case
-            // Step 4
+                // The specification does not indicates this test,
                // but for now we check the document has no child nodes
                let has_no_child_nodes = temp_doc.upcast::<Node>().children().next().is_none();
                if has_no_child_nodes {
                    return None;
                }
            }
            // Step 3
            _ => {
                return None;
            },
        }
-        // Step 9
+        // Step 8
        temp_doc.set_encoding(charset);
-        // Step 13
+
        // Step 9 to 11
        // Done by handle_text_html and handle_xml
        // Step 12
        self.response_xml.set(Some(&temp_doc));
        return self.response_xml.get();
    }
@ -1585,14 +1603,25 @@ impl XMLHttpRequest {
        }
    }
    /// <https://xhr.spec.whatwg.org/#response-mime-type>
    fn response_mime_type(&self) -> Option<Mime> {
        return extract_mime_type(&self.response_headers.borrow())
            .map(|mime_as_bytes| {
                String::from_utf8(mime_as_bytes)
                    .unwrap_or_default()
                    .parse()
                    .ok()
            })
            .flatten()
            .or(Some(mime::TEXT_XML));
    }
    /// <https://xhr.spec.whatwg.org/#final-mime-type>
    fn final_mime_type(&self) -> Option<Mime> {
        if self.override_mime_type.borrow().is_some() {
            self.override_mime_type.borrow().clone()
        } else {
-            match self.response_headers.borrow().typed_get::<ContentType>() {
+            return self.response_mime_type();
                Some(ct) => Some(ct.into()),
                None => None,
            }
        }
    }
 }
--- a/tests/wpt/metadata/fetch/content-type/response.window.js.ini
+++ b/tests/wpt/metadata/fetch/content-type/response.window.js.ini
@ -96,9 +96,6 @@
  [Request: combined response Content-Type: text/plain;charset=gbk text/plain]
    expected: NOTRUN
  [fetch(): separate response Content-Type:  text/plain]
    expected: FAIL
  [fetch(): combined response Content-Type: text/html;" \\" text/plain ";charset=GBK]
    expected: NOTRUN
@ -312,21 +309,6 @@
  [fetch(): separate response Content-Type: text/plain ]
    expected: NOTRUN
  [<iframe>: separate response Content-Type: text/plain */*;charset=gbk]
    expected: FAIL
  [<iframe>: separate response Content-Type: text/html */*]
    expected: FAIL
  [<iframe>: separate response Content-Type: text/html;x=" text/plain]
    expected: FAIL
  [<iframe>: combined response Content-Type: text/html;" \\" text/plain]
    expected: FAIL
  [<iframe>: separate response Content-Type: text/html;" \\" text/plain]
    expected: FAIL
  [<iframe>: combined response Content-Type: text/html;x=" text/plain]
    expected: FAIL
--- a/tests/wpt/metadata/wasm/webapi/contenttype.any.js.ini
+++ b/tests/wpt/metadata/wasm/webapi/contenttype.any.js.ini
@ -1,4 +1,16 @@
 [contenttype.any.worker.html]
  [Response with Content-Type "application/wasm;": compileStreaming]
    expected: FAIL
  [Response with Content-Type "application/wasm;": instantiateStreaming]
    expected: FAIL
  [Response with Content-Type "application/wasm;x": compileStreaming]
    expected: FAIL
  [Response with Content-Type "application/wasm;x": instantiateStreaming]
    expected: FAIL
 [contenttype.any.sharedworker.html]
  expected: ERROR
@ -7,6 +19,18 @@
 [contenttype.any.html]
  [Response with Content-Type "application/wasm;": compileStreaming]
    expected: FAIL
  [Response with Content-Type "application/wasm;": instantiateStreaming]
    expected: FAIL
  [Response with Content-Type "application/wasm;x": compileStreaming]
    expected: FAIL
  [Response with Content-Type "application/wasm;x": instantiateStreaming]
    expected: FAIL
 [contenttype.any.serviceworker.html]
  expected: ERROR
--- a/tests/wpt/metadata/xhr/overridemimetype-blob.html.ini
+++ b/tests/wpt/metadata/xhr/overridemimetype-blob.html.ini
@ -1,10 +1,5 @@
 [overridemimetype-blob.html]
  type: testharness
  [Use text/xml as fallback MIME type]
    expected: FAIL
  [Use text/xml as fallback MIME type, 2]
    expected: FAIL
  [Bogus MIME type should end up as application/octet-stream]
    expected: FAIL
--- a/tests/wpt/metadata/xhr/responsexml-basic.htm.ini
+++ b/tests/wpt/metadata/xhr/responsexml-basic.htm.ini
@ -1,5 +0,0 @@
 [responsexml-basic.htm]
  type: testharness
  [responseXML on empty response documents]
    expected: FAIL
--- a/tests/wpt/metadata/xhr/responsexml-media-type.htm.ini
+++ b/tests/wpt/metadata/xhr/responsexml-media-type.htm.ini
@ -1,4 +0,0 @@
 [responsexml-media-type.htm]
  [XMLHttpRequest: responseXML MIME type tests ('text/plain;+xml', should not parse)]
    expected: FAIL
--- a/tests/wpt/metadata/xhr/responsexml-non-well-formed.htm.ini
+++ b/tests/wpt/metadata/xhr/responsexml-non-well-formed.htm.ini
@ -1,8 +1,5 @@
 [responsexml-non-well-formed.htm]
  type: testharness
  [XMLHttpRequest: responseXML non well-formed tests]
    expected: FAIL
  [XMLHttpRequest: responseXML non well-formed tests 1]
    expected: FAIL